In [None]:
import pandas as pd
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document

# 1. Fake "Amazon Electronics" Data (So you don't need to download a huge file yet)
data = [
    {"text": "The battery life on this phone is terrible. Drains in 4 hours.", "category": "Battery", "sentiment": "Negative"},
    {"text": "Amazing camera quality, especially in low light. Best purchase.", "category": "Camera", "sentiment": "Positive"},
    {"text": "Screen cracked within a week. Gorilla glass is a lie.", "category": "Quality", "sentiment": "Negative"},
    {"text": "Delivery was fast, arrived in 2 days. Packaging was good.", "category": "Delivery", "sentiment": "Positive"},
    {"text": "Overpriced for what you get. The processor is slow.", "category": "Price", "sentiment": "Negative"},
]

print("üîÑ Processing data...")

# 2. Convert to LangChain Documents
documents = []
for item in data:
    # We add metadata (category/sentiment) so we can filter later if needed
    doc = Document(
        page_content=item["text"],
        metadata={"category": item["category"], "sentiment": item["sentiment"]}
    )
    documents.append(doc)

# 3. Initialize Embedding Model (Runs locally on CPU - Free & Private)
# This turns text into numbers (vectors)
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# 4. Create and Save the Vector Database (ChromaDB)
# This creates a folder named "chroma_db" in your directory
vector_db = Chroma.from_documents(
    documents=documents,
    embedding=embedding_model,
    persist_directory="./chroma_db"
)

print("‚úÖ Success! Database created in './chroma_db' folder.")

In [None]:
# Run this cell once to force-install everything in the current notebook kernel
%pip install langchain langchain-community langchain-core langchain-groq langchain-huggingface langchain-chroma streamlit chromadb pandas sentence-transformers

In [None]:
# Run this cell to force-install the main library and all dependencies
%pip install -U langchain langchain-community langchain-core langchain-groq langchain-chroma chromadb sentence-transformers

In [None]:
%%writefile app.py

import streamlit as st
import pandas as pd
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

# --- CONFIGURATION ---
# ‚ö†Ô∏è REPLACE THIS WITH YOUR NEW KEY (Do not share it!)
GROQ_API_KEY = "ENTER_YOUR_GROQ_API_KEY_HERE" 

# --- SETUP PAGE ---
st.set_page_config(page_title="ReviewSense Dashboard", layout="wide")
st.title("ü§ñ ReviewSense: GenAI Customer Insights")

# --- LOAD DATABASE ---
@st.cache_resource
def load_resources():
    # 1. Load Embedding Model
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    
    # 2. Connect to Database
    vector_db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
    
    # 3. Connect to LLM
    llm = ChatGroq(model_name="llama3-8b-8192", groq_api_key=GROQ_API_KEY)
    
    return vector_db, llm

# --- UI LAYOUT ---
col1, col2 = st.columns([1, 1])

# LEFT COLUMN: RAG Chatbot (Manual Logic - No Chains Required)
with col1:
    st.header("üí¨ Semantic Search")
    user_query = st.text_input("Ask a question about the reviews:")
    
    if st.button("Analyze") and user_query:
        if "gsk_" not in GROQ_API_KEY:
            st.error("Please paste your valid Groq API Key in the code!")
        else:
            with st.spinner("Thinking..."):
                vector_db, llm = load_resources()
                
                # STEP 1: RETRIEVE (Search the database manually)
                # We ask the DB for the 2 most similar documents
                results = vector_db.similarity_search(user_query, k=2)
                
                # STEP 2: CONTEXT (Combine the text)
                context_text = "\n\n".join([doc.page_content for doc in results])
                
                # STEP 3: GENERATE (Send to LLM manually)
                prompt = f"""
                You are a helpful assistant. Answer the user's question using ONLY the context below.
                
                Context:
                {context_text}
                
                Question: 
                {user_query}
                """
                
                response = llm.invoke(prompt)
                
                # STEP 4: DISPLAY
                st.success(response.content)
                
                with st.expander("See Retrieved Context"):
                    for doc in results:
                        st.write(f"- {doc.page_content}")

# RIGHT COLUMN: Dashboard Analytics
with col2:
    st.header("üìä Defect Trends")
    chart_data = pd.DataFrame({
        "Category": ["Battery", "Camera", "Quality", "Delivery", "Price"],
        "Complaints": [45, 12, 30, 5, 20] 
    })
    st.bar_chart(chart_data.set_index("Category"))
    st.caption("Automated categorization of 10,000+ reviews.")

In [None]:
import os
print("1. COPY THIS COMMAND AND RUN IT IN YOUR TERMINAL:")
print(f"cd {os.getcwd()}")
print("\n2. THEN RUN THIS:")
print("streamlit run app.py")