In [1]:
import pandas as pd
import streamlit as st
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load & prepare data
@st.cache_data
def load_csv(path="sales_data.csv"):
    df = pd.read_csv(path)
    df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
    df["_year"] = df["Date"].dt.year
    df["_month"] = df["Date"].dt.month
    df["_quarter"] = df["Date"].dt.quarter
    return df

df = load_csv()

2025-09-15 19:16:04.000 
  command:

    streamlit run C:\CoxApps\Python312\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [None]:
# Build corpus
def build_corpus(df):
    corpus = []
    for c in df.select_dtypes(include=["number"]).columns:
        s = df[c].dropna()
        if len(s) == 0:
            continue
        corpus.append(f"{c}: sum={s.sum():.2f}, mean={s.mean():.2f}, median={s.median():.2f}, count={len(s)}")
    if "_year" in df.columns and "Sales" in df.columns:
        yearly = df.groupby("_year")["Sales"].sum().reset_index()
        for _, row in yearly.iterrows():
            corpus.append(f"Year {int(row['_year'])} Sales: sum={row['Sales']:.2f}")
    return corpus

corpus = build_corpus(df)

# Fit retriever
vec = TfidfVectorizer().fit(corpus)
mat = vec.transform(corpus)

In [None]:
# Streamlit UI
st.set_page_config(page_title="InsightForge BI Assistant", layout="wide")
st.title("InsightForge — BI Assistant")

In [None]:
# KPIs
st.header("Key KPIs")
num_cols = df.select_dtypes(include=["number"]).columns
for col in num_cols:
    s = df[col].dropna()
    if len(s) > 0:
        st.write(f"**{col}** → sum={s.sum():,.2f}, mean={s.mean():.2f}, median={s.median():.2f}, std={s.std():.2f}")

In [None]:
# Trends
st.header("Sales Trends")
if "_year" in df.columns and "Sales" in df.columns:
    gy = df.groupby("_year")["Sales"].sum().reset_index()
    st.line_chart(gy, x="_year", y="Sales")

if "_month" in df.columns and "Sales" in df.columns:
    gm = df.groupby("_month")["Sales"].sum().reset_index()
    st.bar_chart(gm, x="_month", y="Sales")

In [None]:
# Q&A box
st.header("Ask a Question about the Data")
q = st.text_input("Type a question, e.g., 'Which year had the highest sales?'")
if st.button("Ask") and q.strip():
    qv = vec.transform([q])
    sims = cosine_similarity(qv, mat).flatten()
    idx = sims.argsort()[::-1][:3]
    facts = [corpus[i] for i in idx]

    st.subheader("Answer")
    st.write(f"Q: {q}")
    for f in facts:
        st.write("-", f)