In [None]:
import wikipediaapi
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.chains import RetrievalQA
from langgraph.graph import Graph
from langgraph.graph import StateGraph
from langgraph.graph import START, END
from langgraph.checkpoint.memory import MemorySaver
import numpy as np
from typing import TypedDict


In [30]:
user_agent = "Wikipedia-API Example (merlin@example.com)"
wiki = wikipediaapi.Wikipedia(user_agent, "en")

python_page = wiki.page("Python (programming language)")
ww2_page = wiki.page("World War II")


In [31]:

if not python_page.exists() or not ww2_page.exists():
    print("One of the pages does not exist.")
else:
    python_article = python_page.text
    ww2_article = ww2_page.text

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
    python_chunks = text_splitter.split_text(python_article)
    ww2_chunks = text_splitter.split_text(ww2_article)

    embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

    python_vectordb = Chroma.from_texts(
        python_chunks, 
        embedding=embedding, 
        persist_directory="python_vectordb"
    )
    ww2_vectordb = Chroma.from_texts(
        ww2_chunks, 
        embedding=embedding, 
        persist_directory="ww2_vectordb"
    )

    print("Vector stores created successfully.")


Vector stores created successfully.


In [None]:
from langchain.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.utils.math import cosine_similarity
from langgraph.graph import StateGraph
from langgraph.graph import START, END
from langgraph.checkpoint.memory import MemorySaver
import numpy as np
from typing import TypedDict

class CustomConfig(TypedDict):
    threshold: float  

embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
python_topic_embedding = embedding.embed_query("Python programming language origins")
ww2_topic_embedding = embedding.embed_query("World War II history")

In [80]:
import os
import dotenv
dotenv.load_dotenv()
import google.generativeai as genai
google_key = os.getenv('GOOGLE_API_KEY')
genai.configure(api_key=google_key)
gemini_llm = genai.GenerativeModel("gemini-1.5-flash")

In [81]:

python_vectordb = Chroma(persist_directory="python_vectordb", embedding_function=embedding)
ww2_vectordb = Chroma(persist_directory="ww2_vectordb", embedding_function=embedding)

In [88]:
def cosine_similarity(vec1, vec2):
    vec1 = np.array(vec1).reshape(1, -1)
    vec2 = np.array(vec2).reshape(1, -1)
    if vec1.shape[1] != vec2.shape[1]:
        raise ValueError(f"Shapes not aligned: vec1 has shape {vec1.shape}, vec2 has shape {vec2.shape}")
    return np.dot(vec1, vec2.T) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

def embed_query(state):
    query = state['query']
    query_embedding = embedding.embed_query(query)
    state['query_embedding'] = np.array(query_embedding).reshape(1, -1)
    return state

def route_query(state, config):
    query_embedding = state['query_embedding']
    
    similarity_to_python = cosine_similarity(query_embedding, np.array(python_topic_embedding).reshape(1, -1))
    similarity_to_ww2 = cosine_similarity(query_embedding, np.array(ww2_topic_embedding).reshape(1, -1))

    if similarity_to_python > similarity_to_ww2 and similarity_to_python >= config['configurable']['threshold']:
        state['route'] = 'python'
    elif similarity_to_ww2 > similarity_to_python and similarity_to_ww2 >= config['configurable']['threshold']:
        state['route'] = 'ww2'
    else:
        state['route'] = 'none'
    return state


In [89]:
builder = StateGraph(dict, CustomConfig)
builder.add_node("embed_query", embed_query)
builder.add_node("route_query", route_query)

builder.add_edge(START, "embed_query")
builder.add_edge("embed_query", "route_query")
builder.add_edge("route_query", END)

memory = MemorySaver()
config = {"configurable": {"thread_id":"AAA", "threshold": 0.7}} 
workflow = builder.compile(checkpointer=memory)

In [90]:

# Sample query
initial_state = {"query": "Tell me about the origins of Python programming language."}
workflow.invoke(initial_state, config=config)


TypeError: Object of type ndarray is not serializable