In [23]:
from dotenv import load_dotenv
import os

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [24]:
from openai import OpenAI

client = OpenAI(api_key=OPENAI_API_KEY)

In [25]:
# Required imports
import wikipedia
import pandas as pd
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
import chromadb

# Step 1: Fetch Wikipedia content
search_keyword = wikipedia.search("MoE")[0]  # Getting the first search result
data = wikipedia.page(search_keyword).content

# Step 2: Prepare the DataFrame for embedding
embedded_df = pd.DataFrame({
    'title': [search_keyword],
    'text': [data]
})

# Step 3: Generate embeddings
embedding_func = OpenAIEmbeddings(model='text-embedding-ada-002')
embedded_df['text_embedding'] = embedded_df['text'].apply(lambda x: embedding_func.embed_query(x))

# Step 4: Set up Chroma client and add data
chroma_client = chromadb.Client("./database")
collection = chroma_client.create_collection(name="Testing")

# Create unique IDs for documents
idx = [str(i) for i in range(len(embedded_df))]
documents = list(embedded_df['title'].values)
metadatas = embedded_df[['text']].to_dict(orient='records')
embedding_list = list(embedded_df['text_embedding'].values)

collection.add(
    documents=documents,
    embeddings=embedding_list,
    metadatas=metadatas,
    ids=idx
)

# Step 5: Set up the LangChain Chroma retriever and LLM
langchainChroma = Chroma(client=chroma_client, collection_name='Testing', embedding_function=embedding_func)
chroma_client.close()

llm = ChatOpenAI()
retriever = langchainChroma.as_retriever(search_kwargs={"k": 2})

# Step 6: Create the QA chain
qa = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever)

# Step 7: Run the query
query = "Explain about MoE please. With python example code."
result = qa.run(query)

print(result)

ValueError: [91mYou are using a deprecated configuration of Chroma.

[94mIf you do not have data you wish to migrate, you only need to change how you construct
your Chroma client. Please see the "New Clients" section of https://docs.trychroma.com/deployment/migration.
________________________________________________________________________________________________

If you do have data you wish to migrate, we have a migration tool you can use in order to
migrate your data to the new Chroma architecture.
Please `pip install chroma-migrate` and run `chroma-migrate` to migrate your data and then
change how you construct your Chroma client.

See https://docs.trychroma.com/deployment/migration for more information or join our discord at https://discord.gg/8g5FESbj for help![0m