In [1]:
import { Ollama } from "npm:langchain/llms/ollama";
import { AgentType } from "npm:langchain/agents";
import { RetrievalQAChain } from "npm:langchain/chains";
import { MemoryVectorStore } from "npm:langchain/vectorstores/memory";
import { OllamaEmbeddings } from "npm:langchain/embeddings/ollama";
import { RecursiveCharacterTextSplitter } from "npm:langchain/text_splitter";
import "npm:d3-dsv";

[Module: null prototype] {
  autoType: [36m[Function: autoType][39m,
  csvFormat: [36m[Function: format][39m,
  csvFormatBody: [36m[Function: formatBody][39m,
  csvFormatRow: [36m[Function: formatRow][39m,
  csvFormatRows: [36m[Function: formatRows][39m,
  csvFormatValue: [36m[Function: formatValue][39m,
  csvParse: [36m[Function: parse][39m,
  csvParseRows: [36m[Function: parseRows][39m,
  dsvFormat: [36m[Function: default][39m,
  tsvFormat: [36m[Function: format][39m,
  tsvFormatBody: [36m[Function: formatBody][39m,
  tsvFormatRow: [36m[Function: formatRow][39m,
  tsvFormatRows: [36m[Function: formatRows][39m,
  tsvFormatValue: [36m[Function: formatValue][39m,
  tsvParse: [36m[Function: parse][39m,
  tsvParseRows: [36m[Function: parseRows][39m
}

In [2]:
// loaders
import { DirectoryLoader } from "npm:langchain/document_loaders/fs/directory"
import { JSONLoader } from "npm:langchain/document_loaders/fs/json"
import { TextLoader } from "npm:langchain/document_loaders/fs/text"
import { CSVLoader } from "npm:langchain/document_loaders/fs/csv"
import { PDFLoader } from "npm:langchain/document_loaders/fs/pdf"
import { Document } from "npm:langchain/document"

In [3]:
const loader = new DirectoryLoader("./docs", {
    ".json": (path) => new JSONLoader(path),
    ".txt": (path) => new TextLoader(path),
    ".csv": (path) => new CSVLoader(path, { separator: ","}),
    ".pdf": (path) => new PDFLoader(path),
});

In [4]:
const docs = await loader.load();
console.log(docs)

[
  Document {
    pageContent: [32m"id: 1\n"[39m +
      [32m"first_name: Oli\n"[39m +
      [32m"last_name: Mound\n"[39m +
      [32m"email: omound@test.com\n"[39m +
      [32m"gender: Female\n"[39m +
      [32m"review: 3"[39m,
    metadata: { source: [32m"/Users/jorishermans/docs/MOCK_DATA.csv"[39m, line: [33m1[39m }
  },
  Document {
    pageContent: [32m"id: 2\n"[39m +
      [32m"first_name: Prince\n"[39m +
      [32m"last_name: Y\n"[39m +
      [32m"email: prince.y@abc.net.au\n"[39m +
      [32m"gender: Male\n"[39m +
      [32m"review: 8"[39m,
    metadata: { source: [32m"/Users/jorishermans/docs/MOCK_DATA.csv"[39m, line: [33m2[39m }
  },
  Document {
    pageContent: [32m"id: 3\n"[39m +
      [32m"first_name: Ricky\n"[39m +
      [32m"last_name: Martin\n"[39m +
      [32m"email: ricky.martin@glu.com\n"[39m +
      [32m"gender: Male\n"[39m +
      [32m"review: 3"[39m,
    metadata: { source: [32m"/Users/jorishermans/docs/MOCK_DATA.csv"

In [5]:
const csvContent = docs.map((doc: Document) => doc.pageContent);
console.log(csvContent);

[
  [32m"id: 1\n"[39m +
    [32m"first_name: Oli\n"[39m +
    [32m"last_name: Mound\n"[39m +
    [32m"email: omound@test.com\n"[39m +
    [32m"gender: Female\n"[39m +
    [32m"review: 3"[39m,
  [32m"id: 2\n"[39m +
    [32m"first_name: Prince\n"[39m +
    [32m"last_name: Y\n"[39m +
    [32m"email: prince.y@abc.net.au\n"[39m +
    [32m"gender: Male\n"[39m +
    [32m"review: 8"[39m,
  [32m"id: 3\n"[39m +
    [32m"first_name: Ricky\n"[39m +
    [32m"last_name: Martin\n"[39m +
    [32m"email: ricky.martin@glu.com\n"[39m +
    [32m"gender: Male\n"[39m +
    [32m"review: 3"[39m,
  [32m"id: 4\n"[39m +
    [32m"first_name: Melanie\n"[39m +
    [32m"last_name: Pidgeon\n"[39m +
    [32m"email: mpidgeon3@test.com\n"[39m +
    [32m"gender: Female\n"[39m +
    [32m"review: 1"[39m,
  [32m"id: 5\n"[39m +
    [32m"first_name: Matthew\n"[39m +
    [32m"last_name: McWire\n"[39m +
    [32m"email: mmcwire@d.org\n"[39m +
    [32m"gender: Male\n"[39m 

In [6]:
const askModel = async (question: string) => {
    const model = new Ollama({ model: "mixtral" });
    let vectorstore;

    const textSplitter = new RecursiveCharacterTextSplitter({
      chunkSize: 500,
      chunkOverlap: 450
    });

    console.log("Text Splitting......");
	console.log(`Chunk size  ----> ${textSplitter.chunkSize}`);
	console.log(`Chunk Overlap  ----> ${textSplitter.chunkOverlap}`);

	const splitDocs = await textSplitter.createDocuments(csvContent);
    console.log(splitDocs)

	const vectorStore = await MemoryVectorStore.fromDocuments(
		splitDocs,
		new OllamaEmbeddings({ model: "mixtral" })
	);

    // RetrievalQAChain
	const chain = RetrievalQAChain.fromLLM(model, vectorStore.asRetriever());
	console.log("Querying...");
	const res = await chain.call({ query: question });
	console.log(res);
    return res;
}

In [None]:
const res = await askModel(`We provided you with data around reviews. 
                           Your task is to determine how many reviews we have knowing that the maximum score is 10, 
                           starting from 0 up to number 10 ?
                           Give me your detailed reasoning why you came to that conclusion.`);
console.log(res.text);

Text Splitting......
Chunk size  ----> 500
Chunk Overlap  ----> 450
[
  Document {
    pageContent: [32m"id: 1\n"[39m +
      [32m"first_name: Oli\n"[39m +
      [32m"last_name: Mound\n"[39m +
      [32m"email: omound@test.com\n"[39m +
      [32m"gender: Female\n"[39m +
      [32m"review: 3"[39m,
    metadata: { loc: { lines: { from: [33m1[39m, to: [33m6[39m } } }
  },
  Document {
    pageContent: [32m"id: 2\n"[39m +
      [32m"first_name: Prince\n"[39m +
      [32m"last_name: Y\n"[39m +
      [32m"email: prince.y@abc.net.au\n"[39m +
      [32m"gender: Male\n"[39m +
      [32m"review: 8"[39m,
    metadata: { loc: { lines: { from: [33m1[39m, to: [33m6[39m } } }
  },
  Document {
    pageContent: [32m"id: 3\n"[39m +
      [32m"first_name: Ricky\n"[39m +
      [32m"last_name: Martin\n"[39m +
      [32m"email: ricky.martin@glu.com\n"[39m +
      [32m"gender: Male\n"[39m +
      [32m"review: 3"[39m,
    metadata: { loc: { lines: { from: [33m1[3

In [None]:
const res = await askModel("Can you give me the exact dataset that you have printed out in columns?");
console.log(res.text);

In [None]:
const res = await askModel("What are the different persons that provided a review of 3? Provide the anwser in short format.");
console.log(res.text);

In [None]:
const res = await askModel("What are the different persons that provided a review score of 3 in the column review? Give me the reason why you pick these persons?");
console.log(res.text);

In [None]:
const res = await askModel("What are the genders in that filled in the reviews ?");
console.log(res.text);

In [None]:
const res = await askModel("How provided the highest score in the column review?");
console.log(res.text);

In [None]:
const res = await askModel("How provided the highest score in the column review? Know that the highest score is 10.");
console.log(res.text);