In [3]:
import os
import pandas as pd
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI

# 🔐 Set your Gemini API key
os.environ["GOOGLE_API_KEY"] = "AIzaSyBeCE6hJjyQSzGUyCtlbpX2SLVeckAYGuE"

# 📄 Load and filter data
df = pd.read_csv("employee_reviews.csv", encoding='ISO-8859-1')
df = df[df['company'].str.lower() == 'google'].copy()

# 🧹 Format text
def format_review(row):
    return f"""Rating: {row['overall-ratings']}
Pros: {row['pros']}
Cons: {row['cons']}
"""
df['text'] = df.apply(format_review, axis=1)

# 📘 Convert to LangChain Documents
documents = [Document(page_content=text) for text in df['text'].tolist()]

# ✂️ Chunking
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
split_docs = splitter.split_documents(documents)

# 🔍 Embeddings
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")

# 🧠 Chroma Vectorstore
vectorstore = Chroma.from_documents(split_docs, embedding, persist_directory="chroma_local")

# 🔁 Reload Vectorstore
db = Chroma(persist_directory="chroma_local", embedding_function=embedding)

# 🧠 Gemini LLM wrapper via LangChain
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.3)

# 🧾 Custom Prompt
custom_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are an AI assistant analyzing Google employee reviews.

Use the following review excerpts to answer the user's question.
Only use the information provided — do not assume anything.

--------------------
{context}
--------------------

Question: {question}

Answer:"""
)

# 🔗 RetrievalQA Chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=db.as_retriever(search_kwargs={"k": 5}),
    chain_type="stuff",
    chain_type_kwargs={"prompt": custom_prompt}
)

# 💬 Chat Loop
print("\n🤖 Google Employee Review Chatbot (Gemini Powered)")
print("Type 'exit' to quit\n")

while True:
    query = input("You: ")
    if query.lower() == "exit":
        break
    answer = qa.run(query)
    print("🤖", answer)



🤖 Google Employee Review Chatbot (Gemini Powered)
Type 'exit' to quit

You: how is the work environment in google
🤖 Google's work environment is described as incredibly friendly, with helpful colleagues and managers who care about employees' career goals.  The environment is also catered to employee health, happiness, and wellness, with Google actively seeking employee input and implementing changes to improve their lives.  While some find the work can become boring for those not aiming for advancement, opportunities for mobility are considered accessible and well-defined.  One review mentions a dynamic and exciting environment, although another notes that Google can be inflexible in certain areas.
You: hi
🤖 hi
You: how is the food in google
🤖 Google provides free food during lunch, snacks, and drinks.  Multiple reviews mention the abundance of free food positively, although one review notes it's difficult to resist.
You: how are my managers
🤖 Reviews are mixed.  Some describe manager

In [2]:
%pip install -U langchain-community langchain-google-genai chromadb

Collecting langchain-community
  Downloading langchain_community-0.3.26-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-google-genai
  Downloading langchain_google_genai-2.1.5-py3-none-any.whl.metadata (5.2 kB)
Collecting chromadb
  Downloading chromadb-1.0.13-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Collecting langchain-core<1.0.0,>=0.3.66 (from langchain-community)
  Downloading langchain_core-0.3.66-py3-none-any.whl.metadata (5.8 kB)
Collecting langchain<1.0.0,>=0.3.26 (from langchain-community)
  Downloading langchain-0.3.26-py3-none-any.whl.metadata (7.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.10.0-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.w

In [1]:
%pip install --upgrade langchain langchain-google-genai


