In [None]:
from dotenv import load_dotenv
import os
import csv
from langchain.llms import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings.cohere import CohereEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
from langchain.vectorstores import Chroma
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.llms import OpenAI

load_dotenv()
openai_api_key = os.environ['OPENAI_API_KEY']

llm = OpenAI(temperature=0.9)

In [None]:

csv_file_path = './docs/articles-pages-test.csv'

# Read the CSV data
data = []
with open(csv_file_path, 'r', encoding='utf-8') as csvfile:
    csv_reader = csv.DictReader(csvfile, delimiter=',', quotechar='"')
    for row in csv_reader:
        data.append(row)

# Split the text and add the prefix to the source
texts = []
sources = []
text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
for row in data:
    text = row['Body HTML']
    source = "https://tameson.com/pages/" + row['Handle']
    splitted_texts = text_splitter.split_text(text)
    texts.extend(splitted_texts)
    sources.extend([source] * len(splitted_texts))

# Create embeddings and initialize Chroma
embeddings = OpenAIEmbeddings()
docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": src} for src in sources])

In [None]:
query = "What is a solenoid valve?"
docs = docsearch.similarity_search(query)

chain = load_qa_with_sources_chain(OpenAI(temperature=1), chain_type="stuff")
chain({"input_documents": docs, "question": query}, return_only_outputs=True)