In [None]:
from dotenv import load_dotenv
import os

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [None]:
from openai import OpenAI

client = OpenAI(api_key=OPENAI_API_KEY)

In [None]:
# Required imports
import wikipedia
import pandas as pd
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA


# Step 1: Fetch Wikipedia content
search_keyword = wikipedia.search("MoE")[0]  # Getting the first search result
data = wikipedia.page(search_keyword).content

# Step 2: Prepare the DataFrame for embedding
embedded_df = pd.DataFrame({
    'title': [search_keyword],
    'text': [data]
})

# Step 3: Generate embeddings
embedding_func = OpenAIEmbeddings(model='text-embedding-ada-002')
embedded_df['text_embedding'] = embedded_df['text'].apply(lambda x: embedding_func.embed_query(x))

# Step 4: Set up Chroma client and a
# d data to vector store
persist_dir = "./chroma_db"
chroma_collection_name = "Testing"
documents = list(embedded_df['title'].values)
texts = list(embedded_df['text'].values)

chroma_client = Chroma.from_texts(
    texts=texts,
    embedding=embedding_func,
    collection_name=chroma_collection_name,
    persist_directory=persist_dir
)

# Step 5: Set up LangChain retriever and LLM
llm = ChatOpenAI()  # OpenAI API key should be set in environment
retriever = chroma_client.as_retriever(search_kwargs={"k": 2})

# Step 6: Create the QA chain
qa = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever)

# Step 7: Run the query
query = "Explain about python."
result = qa.run(query)

print(result)

In [None]:
import wikipedia
import pandas as pd
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA

# Define Chroma database directory and collection name
persist_dir = "./chroma_db"
chroma_collection_name = "Testing"

# Step 1: Set up Chroma client with persistent storage
embedding_func = OpenAIEmbeddings(model='text-embedding-ada-002')

if os.path.exists(persist_dir):
    # Load existing Chroma database if it exists
    chroma_client = Chroma(
        collection_name=chroma_collection_name,
        persist_directory=persist_dir
    )
else:
    # If Chroma database doesn't exist, create a new one with Wikipedia content
    search_keyword = wikipedia.search("MoE")[0]  # Getting the first search result
    data = wikipedia.page(search_keyword).content

    # Prepare the DataFrame for embedding
    embedded_df = pd.DataFrame({
        'title': [search_keyword],
        'text': [data]
    })
    embedded_df['text_embedding'] = embedded_df['text'].apply(lambda x: embedding_func.embed_query(x))
    
    # Add data to Chroma
    documents = list(embedded_df['title'].values)
    texts = list(embedded_df['text'].values)
    
    chroma_client = Chroma.from_texts(
        texts=texts,
        embedding=embedding_func,
        collection_name=chroma_collection_name,
        persist_directory=persist_dir
    )
    chroma_client.persist()  # Save data to database

# Step 2: Set up LangChain retriever and LLM
llm = ChatOpenAI()  # OpenAI API key should be set in environment
retriever = chroma_client.as_retriever(search_kwargs={"k": 2}, embedding_function=embedding_func)

# Step 3: Create the QA chain
qa = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever)

# Step 4: Run the query
query = "Explain about python."
result = qa.run(query)

print(result)
