In [3]:
!pip install -U langchain langchain-community openai faiss-cpu sentence-transformers


Collecting langchain
  Downloading langchain-1.0.3-py3-none-any.whl.metadata (4.7 kB)
Collecting langchain-community
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting openai
  Downloading openai-2.7.1-py3-none-any.whl.metadata (29 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting langchain-core<2.0.0,>=1.0.0 (from langchain)
  Downloading langchain_core-1.0.3-py3-none-any.whl.metadata (3.5 kB)
Collecting langgraph<1.1.0,>=1.0.2 (from langchain)
  Downloading langgraph-1.0.2-py3-none-any.whl.metadata (7.4 kB)
Collecting langchain-classic<2.0.0,>=1.0.0 (from langchain-community)
  Downloading langchain_classic-1.0.0-py3-none-any.whl.metadata (3.9 kB)
Collecting requests<3.0.0,>=2.32.5 (from langchain-community)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain-community)
  Downloading da

In [4]:
!pip install streamlit transformers sentence-transformers faiss-cpu langchain-community langchain-text-splitters pyngrok


Collecting streamlit
  Downloading streamlit-1.51.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.4.1-py3-none-any.whl.metadata (8.1 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.51.0-py3-none-any.whl (10.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m70.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.4.1-py3-none-any.whl (25 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m53.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyngrok, pydeck, streamlit
Successfully installed pydeck-0.9.1 pyngrok-7.4.1 streamlit-1.51.0


In [5]:
!pip install transformers accelerate torch sentence-transformers




In [6]:
!mkdir -p src data


In [7]:
%%writefile src/data_loader.py
import pandas as pd

def load_business_data(path="data/business_data.csv"):
    df = pd.read_csv(path)
    df.fillna("", inplace=True)
    return df


Writing src/data_loader.py


In [8]:
%%writefile src/rag_agent.py
import pandas as pd
from transformers import pipeline
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.docstore.document import Document


# Load the lightweight, smart model once (global)
generator = pipeline(
    "text-generation",
    model="tiiuae/falcon-7b-instruct",
    device_map="auto"
)


def build_agent(data_path="data/business_data.csv"):
    # Load CSV
    df = pd.read_csv(data_path)

    # Convert rows to text for context
    text = "\n".join(
        f"Month: {r['Month']}, Sales: {r['Sales (INR)']}, Expenses: {r['Expenses (INR)']}, "
        f"Customers: {r['Customers']}, Inventory Cost: {r['Inventory Cost (INR)']}, "
        f"Marketing Spend: {r['Marketing Spend (INR)']}"
        for _, r in df.iterrows()
    )
    docs = [Document(page_content=text)]

    # Split into manageable chunks
    splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = splitter.split_documents(docs)

    # Create embeddings & FAISS retriever
    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    vectordb = FAISS.from_documents(chunks, embeddings)
    retriever = vectordb.as_retriever(search_type="similarity", search_kwargs={"k": 3})

    # Query processing function
    def answer_question(query: str):
        # Retrieve relevant chunks
        retrieved_docs = retriever.invoke(query)
        context = "\n".join(d.page_content for d in retrieved_docs)

        # Create structured prompt
        prompt = (
            f"You are a business analyst. Use the given context to answer clearly and concisely.\n\n"
            f"Context:\n{context}\n\n"
            f"Question: {query}\n\n"
            f"Answer:"
        )

        # Generate response with tuned controls for speed and clarity
        response = generator(
            prompt,
            max_new_tokens=150,
            temperature=0.6,
            top_p=0.85,
            repetition_penalty=1.5
        )[0]["generated_text"]

        # Extract only the final answer
        answer = response.split("Answer:")[-1].strip()
        return answer

    return answer_question


# Streamlit wrapper
_agent = build_agent()

def query_agent(query: str):
    """Used by Streamlit to handle user queries"""
    return _agent(query)


Writing src/rag_agent.py


In [9]:
%%writefile src/app.py
import streamlit as st
from rag_agent import query_agent

st.title("GenAI Agent for SME/MSME Business Insights")
st.write("Ask about your sales, profit or cost-saving strategies!")

query = st.text_input("Enter your question:")
if st.button("Get Insights"):
    with st.spinner("Analyzing..."):
        st.success(query_agent(query))


Writing src/app.py


In [10]:
from google.colab import files
uploaded = files.upload()
!mv business_data.csv data/


Saving business_data.csv to business_data.csv


In [50]:
from getpass import getpass
import os
from pyngrok import ngrok

# Enter the ngrok key securely at runtime
os.environ["NGROK_TOKEN"] = getpass("Enter your ngrok token: ")

# Set the ngrok token from the environment variable
ngrok.set_auth_token(os.getenv("NGROK_TOKEN"))


Enter your ngrok token: ··········


In [51]:
from pyngrok import ngrok
ngrok.kill()   # closes all active tunnels


In [52]:
!nohup streamlit run src/app.py --server.port 8501 &>/content/logs.txt &
print("Public URL:", ngrok.connect(8501))


Public URL: NgrokTunnel: "https://grover-unbeholdable-reproachfully.ngrok-free.dev" -> "http://localhost:8501"


In [17]:
%%writefile src/app.py
import streamlit as st
from rag_agent import query_agent
import pandas as pd
import matplotlib.pyplot as plt

# ==================== PAGE CONFIG ====================
st.set_page_config(
    page_title="AI Business Analyst Dashboard",
    layout="wide",
)

# ==================== CUSTOM STYLE ====================
st.markdown("""
<style>
body {
    background: linear-gradient(135deg, #e0f7fa, #ede7f6);
    font-family: 'Poppins', sans-serif;
}
.stButton>button {
    background-color: #6a1b9a;
    color: white;
    border-radius: 10px;
    padding: 0.6em 1.2em;
    border: none;
    font-weight: 500;
    transition: 0.3s;
}
.stButton>button:hover {
    background-color: #4a148c;
    transform: scale(1.05);
}
h1, h2, h3 {
    color: #4a148c;
}
</style>
""", unsafe_allow_html=True)

# ==================== TITLE ====================
st.title("AI Business Analyst Dashboard")
st.markdown("Your intelligent assistant for analyzing sales, expenses, and marketing trends.")

# ==================== FILE UPLOAD ====================
uploaded = st.file_uploader(" Upload a CSV file (optional)", type=["csv"])

if uploaded:
    df = pd.read_csv(uploaded)
    st.success(" New dataset uploaded successfully!")
else:
    df = pd.read_csv("data/business_data.csv")

# ==================== SIDEBAR METRICS ====================
with st.sidebar:
    st.header(" Business Summary")

    # Basic metrics
    df["Profit"] = df["Sales (INR)"] - df["Expenses (INR)"]
    avg_sales = df["Sales (INR)"].mean()
    avg_expenses = df["Expenses (INR)"].mean()
    avg_profit = df["Profit"].mean()
    roi = ((df["Profit"] / df["Marketing Spend (INR)"]) * 100).mean()
    best_month = df.loc[df["Profit"].idxmax(), "Month"]

    st.metric(" Average Sales (₹)", f"{avg_sales:,.0f}")
    st.metric(" Average Expenses (₹)", f"{avg_expenses:,.0f}")
    st.metric(" Average Profit (₹)", f"{avg_profit:,.0f}")
    st.metric(" ROI (%)", f"{roi:.2f}")
    st.metric(" Best Month", best_month)

    st.markdown("---")
    st.info(" Tip: Try asking questions like *'Show sales trends'* or *'Which month had highest profit?'*")

# ==================== AUTO SUMMARY ====================
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

if len(st.session_state.chat_history) == 0:
    st.markdown("###  Quick Overview")
    st.success(
        f"Your dataset covers **{len(df)} months**. "
        f"The highest profit was in **{best_month}**, "
        f"with an average ROI of **{roi:.2f}%**."
    )

# ==================== CHARTS SECTION ====================
st.markdown("## Monthly Trends")

col1, col2 = st.columns(2)

with col1:
    st.markdown("### Sales vs Expenses")
    fig, ax = plt.subplots()
    ax.plot(df["Month"], df["Sales (INR)"], label="Sales", marker="o", color="#6a1b9a")
    ax.plot(df["Month"], df["Expenses (INR)"], label="Expenses", marker="o", color="#ce93d8")
    plt.xticks(rotation=45)
    plt.legend()
    st.pyplot(fig)

with col2:
    st.markdown("### Monthly Profit")
    st.bar_chart(df.set_index("Month")[["Profit"]])

st.markdown("---")

# ==================== AI Q&A CHAT ====================
st.header(" Ask the AI Analyst")

query = st.text_input("Enter your business question:")
ask = st.button(" Analyze")

if ask and query:
    with st.spinner("Analyzing your business data..."):
        answer = query_agent(query)
        st.session_state.chat_history.append((query, answer))
        st.success(answer)

    # Auto chart trigger if user asks about trends
    if any(x in query.lower() for x in ["trend", "chart", "plot", "graph", "compare"]):
        st.info(" Here's a visual representation of your data:")
        st.line_chart(df.set_index("Month")[["Sales (INR)", "Expenses (INR)"]])

# ==================== CHAT HISTORY ====================
if st.session_state.chat_history:
    st.markdown("---")
    st.markdown("###  Conversation History")
    for q, a in reversed(st.session_state.chat_history):
        st.markdown(f"** You:** {q}")
        st.markdown(f"** Analyst:** {a}")


Overwriting src/app.py
