# **Product Recommender App**

**installing important libraries**

In [1]:
! pip install pandas
! pip install numpy
! pip install scikit-learn
! pip install matplotlib
! pip install joblib
! pip install streamlit


Collecting streamlit
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.50.0-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m57.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m59.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.50.0


installing important libraries quietly

In [2]:
!pip -q install pandas numpy scikit-learn joblib streamlit cloudflared


  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.0/63.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for cloudflared (setup.py) ... [?25l[?25hdone


writing content into the file, this script contains functions for building synthetic data, preprocessing data, adding implicit feedback, training collaborative filtering and content-based filtering models, evaluating the models, and saving the trained models and data.


In [3]:
%%writefile /content/smart_recommender.py
import os, math, random
import numpy as np, pandas as pd
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import joblib

SEED=42
random.seed(SEED); np.random.seed(SEED)

def build_synth(n_users=600,n_items=350,minr=10,maxr=25):
    users=[f"U{u}" for u in range(1,n_users+1)]
    items=[f"P{i}" for i in range(1,n_items+1)]
    cats=['Electronic','Books','Home','Toys','Beauty','Sports','Clothing','Grocery']
    rows=[]
    for u in users:
        k=np.random.randint(minr,maxr+1)
        rated=np.random.choice(items,size=k,replace=False)
        for it in rated:
            r=np.clip(np.random.normal(3.6,1.0),1,5)
            r=round(r*2)/2.0
            rows.append((u,it,r))
    ratings=pd.DataFrame(rows,columns=['userId','productId','rating'])
    kw={'Electronic':['battery','wireless','bluetooth','USB','portable','charger'],
        'Books':['story','novel','guide','history','author','learn'],
        'Home':['kitchen','durable','design','compact','decor','clean'],
        'Toys':['kids','fun','safe','interactive','educational','colorful'],
        'Beauty':['gentle','skin','organic','scent','serum','moisturizer'],
        'Sports':['fitness','outdoor','durable','training','performance','comfort'],
        'Clothing':['fabric','comfortable','casual','size','style','soft'],
        'Grocery':['fresh','organic','snack','ingredients','package','tasty']}
    metas=[]
    for i,p in enumerate(items,1):
        c=random.choice(cats); title=f"{c} Product {i}"
        desc=" ".join(np.random.choice(kw[c],size=6,replace=True))
        metas.append({'productId':p,'title':title,'category':c,'description':f"{title}. {desc}. High quality and good value."})
    products=pd.DataFrame(metas)
    return ratings,products

def preprocess(df):
    df=df.drop_duplicates().dropna(subset=['userId','productId','rating']).reset_index(drop=True)
    df['userId']=df['userId'].astype(str); df['productId']=df['productId'].astype(str)
    df['rating']=pd.to_numeric(df['rating'],errors='coerce').astype(float)
    return df

def add_implicit(df):
    mult=np.random.uniform(0.8,1.4,size=len(df))
    df=df.copy()
    df['implicit_score']=(df['rating']/5.0)*mult
    return df

def train_cf(train_df,n_components=30):
    users=sorted(train_df['userId'].unique())
    items=sorted(train_df['productId'].unique())
    pivot=train_df.pivot_table(index='userId',columns='productId',values='rating').reindex(index=users,columns=items)
    filled=pivot.copy()
    umean=pivot.mean(axis=1); gmean=train_df['rating'].mean()
    for u in users:
        filled.loc[u]=filled.loc[u].fillna(umean.loc[u] if not np.isnan(umean.loc[u]) else gmean)
    svd=TruncatedSVD(n_components=n_components,random_state=SEED)
    U=svd.fit_transform(filled.values); V=svd.components_.T
    pred=np.dot(U,V.T)
    pred_df=pd.DataFrame(pred,index=users,columns=items)
    return svd,pred_df,filled

def train_cbf(products,max_features=2000):
    vec=TfidfVectorizer(max_features=max_features,stop_words='english')
    X=vec.fit_transform(products['description'].fillna(products['title'].fillna('')))
    return vec,X

def evaluate_rmse(pred_df,test_df):
    y_true=[]; y_pred=[]
    for _,r in test_df.iterrows():
        u,p,rt=r['userId'],r['productId'],r['rating']
        if (u in pred_df.index) and (p in pred_df.columns):
            y_true.append(rt); y_pred.append(pred_df.loc[u,p])
    return mean_squared_error(y_true,y_pred,squared=False) if y_true else None

def precision_at_k(pred_df,train_df,test_df,k=5,thr=4.0):
    users=test_df['userId'].unique(); precs=[]
    for u in users:
        if u not in pred_df.index: continue
        train_items=set(train_df[train_df['userId']==u]['productId'])
        cand=[i for i in pred_df.columns if i not in train_items]
        if not cand: continue
        top=sorted([(i,pred_df.loc[u,i]) for i in cand],key=lambda x:x[1],reverse=True)[:k]
        top_ids=[i for i,_ in top]
        relevant=set(test_df[(test_df['userId']==u)&(test_df['rating']>=thr)]['productId'])
        if not top_ids: continue
        prec=len([i for i in top_ids if i in relevant])/len(top_ids)
        precs.append(prec)
    return float(np.mean(precs)) if precs else 0.0

def train_and_save():
    ratings,products=build_synth()
    ratings=preprocess(ratings)
    ratings=add_implicit(ratings)
    train_df,test_df=train_test_split(ratings,test_size=0.2,random_state=SEED)
    svd,pred_df,filled=train_cf(train_df,30)
    vec,X=train_cbf(products)
    rmse=evaluate_rmse(pred_df,test_df)
    p5=precision_at_k(pred_df,train_df,test_df,5,4.0)
    print(f"RMSE: {rmse:.4f} | Precision@5: {p5:.4f}")
    out="/content/recommender_app/models"; os.makedirs(out,exist_ok=True)
    joblib.dump(pred_df, f"{out}/pred_matrix_df.pkl")
    joblib.dump(svd, f"{out}/truncated_svd.pkl")
    joblib.dump(vec, f"{out}/tfidf_vectorizer.pkl")
    joblib.dump(X, f"{out}/tfidf_matrix.pkl")
    products.to_pickle(f"{out}/products_df.pkl")
    train_df.to_pickle(f"{out}/train_ratings_df.pkl")
    print("Artifacts saved to", out)

if __name__=="__main__":
    train_and_save()


Writing /content/smart_recommender.py


uses a bash script to patch the evaluate_rmse function within the file. It replaces the original implementation with a new one that uses numpy for calculation and handles cases where there are no true values to compare against.

In [4]:
%%bash
python - <<'PY'
from pathlib import Path
p = Path("/content/smart_recommender.py")
if not p.exists():
    print("ERROR: /content/smart_recommender.py not found. Make sure file path is correct.")
    raise SystemExit(1)

text = p.read_text()

old_snip = """
def evaluate_rmse(pred_df,test_df):
    y_true=[]; y_pred=[]
    for _,r in test_df.iterrows():
        u,p,rt=r['userId'],r['productId'],r['rating']
        if (u in pred_df.index) and (p in pred_df.columns):
            y_true.append(rt); y_pred.append(pred_df.loc[u,p])
    return mean_squared_error(y_true,y_pred,squared=False) if y_true else None
"""

new_snip = """
import numpy as np
def evaluate_rmse(pred_df,test_df):
    y_true = []
    y_pred = []
    for _,r in test_df.iterrows():
        u = r['userId']; p = r['productId']; rt = r['rating']
        if (u in pred_df.index) and (p in pred_df.columns):
            y_true.append(rt)
            y_pred.append(pred_df.loc[u, p])
    if len(y_true) == 0:
        return None
    y_true = np.array(y_true, dtype=float)
    y_pred = np.array(y_pred, dtype=float)
    mse = np.mean((y_true - y_pred) ** 2)
    return float(np.sqrt(mse))
"""

if old_snip in text:
    text = text.replace(old_snip, new_snip)
    p.write_text(text)
    print("Patched evaluate_rmse in /content/smart_recommender.py")
else:
    import re
    if re.search(r"def evaluate_rmse\(", text):
        text = re.sub(r"def evaluate_rmse\\([\\s\\S]*?\\)\\n\\s*return[\\s\\S]*?\\n", new_snip + "\n", text, flags=re.MULTILINE)
        p.write_text(text)
        print("Replaced evaluate_rmse via regex in /content/smart_recommender.py")
    else:
        print("Could not find evaluate_rmse function. Please open the file and replace the function with the new version manually.")
PY


Patched evaluate_rmse in /content/smart_recommender.py


This runs the train_and_save function within the script, which builds synthetic data, trains the recommender models, evaluates them, and saves the artifacts to the directory and The output shows the RMSE and Precision@5 scores, and confirms where the artifacts were saved.

In [5]:
!python /content/smart_recommender.py


RMSE: 0.9577 | Precision@5: 0.0045
Artifacts saved to /content/recommender_app/models


This cell writes the Python code for a Streamlit web application into a file named streamlit_recommender_app.py in the /content/ directory. This app loads the saved model artifacts and provides a user interface to get product recommendations based on user ID. It implements a hybrid recommendation approach and displays information about the recommended products, including explanations for the recommendations.
**updated to see the trend scores and semantic analysis**

In [6]:
%%writefile /content/streamlit_recommender_app.py
import streamlit as st
import pandas as pd
import numpy as np
import joblib
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

@st.cache_resource
def load_artifacts():
    pred_matrix_df = joblib.load("/content/recommender_app/models/pred_matrix_df.pkl")
    svd = joblib.load("/content/recommender_app/models/truncated_svd.pkl")
    tfidf_vectorizer = joblib.load("/content/recommender_app/models/tfidf_vectorizer.pkl")
    tfidf_matrix = joblib.load("/content/recommender_app/models/tfidf_matrix.pkl")
    products_df = pd.read_pickle("/content/recommender_app/models/products_df.pkl")
    ratings_df = pd.read_pickle("/content/recommender_app/models/train_ratings_df.pkl")
    desc_embeddings = np.load("/content/recommender_app/models/desc_embeddings.npy")
    sentence_model = joblib.load("/content/recommender_app/models/sentence_model.pkl")
    return pred_matrix_df, tfidf_vectorizer, tfidf_matrix, products_df, ratings_df, desc_embeddings, sentence_model

pred_matrix_df, tfidf_vectorizer, tfidf_matrix, products_df, ratings_df, desc_embeddings, sentence_model = load_artifacts()

CATEGORY_TO_IMAGE = {
    "Electronic": "https://source.unsplash.com/800x520/?electronic,gadgets",
    "Books": "https://source.unsplash.com/800x520/?books,reading",
    "Home": "https://source.unsplash.com/800x520/?home,interior",
    "Toys": "https://source.unsplash.com/800x520/?toys,kids",
    "Beauty": "https://source.unsplash.com/800x520/?beauty,cosmetics",
    "Sports": "https://source.unsplash.com/800x520/?sports,fitness",
    "Clothing": "https://source.unsplash.com/800x520/?clothing,fashion",
    "Grocery": "https://source.unsplash.com/800x520/?grocery,food",
}
def img_for(cat): return CATEGORY_TO_IMAGE.get(cat, "https://source.unsplash.com/800x520/?product")

PROD_TO_IDX = {pid: idx for idx, pid in enumerate(products_df["productId"])}
POPULAR_ORDER = list(ratings_df["productId"].value_counts().index)

def dynamic_weights(num_ratings, min_r=5, max_r=40):
    if num_ratings <= min_r: a_cf = 0.25
    else: a_cf = 0.25 + 0.65 * min(num_ratings - min_r, max_r - min_r) / (max_r - min_r)
    return a_cf, 1.0 - a_cf

def build_explanations(user_id, pid, cf_raw, cbf_score, semantic_sim):
    reasons=[]
    cf_norm=(cf_raw-1)/4 if cf_raw is not None else 0.0
    if cf_norm>0.5: reasons.append("Users with tastes like yours rated this highly.")
    liked = ratings_df[(ratings_df.userId==user_id) & (ratings_df.rating>=4.0)]['productId'].tolist()
    if semantic_sim > 0.1 and liked:
        liked_idxs=[PROD_TO_IDX[l] for l in liked if l in PROD_TO_IDX]
        if pid in PROD_TO_IDX and liked_idxs:
            p_idx=PROD_TO_IDX[pid]
            liked_embeddings = desc_embeddings[liked_idxs]
            target_embedding = desc_embeddings[p_idx].reshape(1, -1)
            semantic_sims = cosine_similarity(target_embedding, liked_embeddings).flatten()
            best_liked_idx_in_list = int(np.argmax(semantic_sims))
            best_liked_pid = liked[best_liked_idx_in_list]
            best_title=products_df.loc[products_df.productId==best_liked_pid].iloc[0]['title']
            reasons.append(f"Semantically similar to what you liked: '{best_title}'.")
        else:
            reasons.append("Shares features with items you liked.")
    if pid in POPULAR_ORDER:
        rnk=POPULAR_ORDER.index(pid)+1
        if rnk<=20: reasons.append(f"Popular choice (top {rnk} most-rated).")
    if not reasons: reasons.append("Recommended by hybrid model signals.")
    return reasons


def recommend(user_id, n=5):
    all_p = products_df['productId'].tolist()
    seen = ratings_df[ratings_df.userId==user_id]['productId'].tolist()
    cand = [p for p in all_p if p not in seen]
    num_r = len(ratings_df[ratings_df.userId==user_id])
    a_cf, a_cbf = dynamic_weights(num_r)
    rows=[]
    for p in cand:
        cf_raw = float(pred_matrix_df.loc[user_id,p]) if (user_id in pred_matrix_df.index and p in pred_matrix_df.columns) else 0.0
        cf_norm=(cf_raw-1)/4
        liked = ratings_df[(ratings_df.userId==user_id) & (ratings_df.rating>=4.0)]['productId'].tolist()
        semantic_sim = 0.0
        if liked and (p in PROD_TO_IDX):
            p_idx=PROD_TO_IDX[p]
            liked_idxs=[PROD_TO_IDX[l] for l in liked if l in PROD_TO_IDX]

            tfidf_sim = float(cosine_similarity(tfidf_matrix[p_idx], tfidf_matrix[liked_idxs]).flatten().mean()) if liked_idxs else 0.0

            liked_embeddings = desc_embeddings[liked_idxs]
            target_embedding = desc_embeddings[p_idx].reshape(1, -1)
            semantic_sim = float(cosine_similarity(target_embedding, liked_embeddings).flatten().mean()) if liked_idxs else 0.0

            cbf_sim = (tfidf_sim + semantic_sim) / 2.0 if liked_idxs else 0.0

        else:
            cbf_sim=0.0

        score = a_cf*cf_norm + a_cbf*cbf_sim
        meta = products_df.loc[products_df.productId==p].iloc[0]
        rows.append({
            "productId": p,
            "title": meta["title"],
            "category": meta["category"],
            "description": meta["description"],
            "hybrid_score": float(score),
            "cf_raw": float(cf_raw),
            "cbf_sim": float(cbf_sim),
            "semantic_sim": float(semantic_sim),
            "reasons": build_explanations(user_id,p,cf_raw,cbf_sim, semantic_sim),
            "image_url": img_for(meta["category"]),
            "trend_score": float(meta["trend_score"])
        })
    out = pd.DataFrame(rows).sort_values("hybrid_score", ascending=False).head(n).reset_index(drop=True)
    return out, a_cf, a_cbf

st.title("Product Recommender App")
st.caption("Weighted Hybrid: Collaborative + Content-Based with explanations")

users = sorted(ratings_df['userId'].unique())
user = st.selectbox("Select User ID", users)
n = st.slider("Number of recommendations", 3, 10, 5)

if st.button("🔍 Get Recommendations"):
    df, a_cf, a_cbf = recommend(user, n)
    st.subheader(f"Recommendations for {user}")
    st.caption(f"Weights → CF: {a_cf:.2f} | CBF: {a_cbf:.2f}")
    for _, row in df.iterrows():
        st.image(row["image_url"])
        st.markdown(f"### {row['title']}")
        st.markdown(f"*Category:* {row['category']}")
        st.markdown(f"**Hybrid Score:** {row['hybrid_score']:.3f}  |  **CF(raw):** {row['cf_raw']:.3f}  |  **CBF(sim):** {row['cbf_sim']:.3f} | **Semantic Sim:** {row['semantic_sim']:.3f} | **Trend Score:** {row['trend_score']:.3f}") # Display semantic_sim
        st.markdown("**Why this?**")
        for reason in row["reasons"]:
            st.markdown(f"- {reason}")
        st.caption(row["description"][:220] + ("..." if len(row["description"])>220 else ""))
        st.divider()

st.sidebar.header("How it works")
st.sidebar.markdown("- **CF** learns from similar users' ratings.\n- **CBF** compares product text using TF-IDF and semantic embeddings.\n- **Weights** adapt to how active the user is (cold-start aware).\n- Unsplash placeholders used for images; replace with your own if available.")
st.sidebar.markdown("- **Trend Score** reflects recent popularity.")

Writing /content/streamlit_recommender_app.py


This cell uses bash commands to first kill any running Streamlit or cloudflared processes. Then, it starts the Streamlit application in the background on port 8501 and starts a cloudflared tunnel to expose the Streamlit app to the internet. The output shows that previous processes were terminated.

In [7]:
!pkill -f streamlit || true
!pkill -f cloudflared || true

!streamlit run /content/streamlit_recommender_app.py --server.port 8501 --server.headless true &>/content/streamlit.log &

!nohup cloudflared tunnel --url http://localhost:8501 --no-autoupdate > /content/cf.log 2>&1 &

^C
^C


This cell downloads the cloudflared executable for Linux and makes it executable. The output indicates that the file is busy, likely because the previous cell is already using it.

In [8]:
!wget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -O /usr/local/bin/cloudflared
!chmod +x /usr/local/bin/cloudflared

--2025-10-22 10:41:11--  https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64
Resolving github.com (github.com)... 140.82.114.3
Connecting to github.com (github.com)|140.82.114.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github.com/cloudflare/cloudflared/releases/download/2025.10.0/cloudflared-linux-amd64 [following]
--2025-10-22 10:41:12--  https://github.com/cloudflare/cloudflared/releases/download/2025.10.0/cloudflared-linux-amd64
Reusing existing connection to github.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://release-assets.githubusercontent.com/github-production-release-asset/106867604/3cf4ee00-005b-4d19-8b1e-15145bd129bf?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-10-22T11%3A34%3A20Z&rscd=attachment%3B+filename%3Dcloudflared-linux-amd64&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-10-22

This cell uses bash commands to check if the Streamlit process is running, display the first 200 lines of the Streamlit log file, and extract the public URL generated by cloudflared from the cloudflared log file. The output confirms the Streamlit process is running, shows some initial Streamlit logs, and provides the public URL to access the app.

In [9]:
!ps -ef | grep streamlit | grep -v grep

!sed -n '1,200p' /content/streamlit.log

!grep -o 'https://.*trycloudflare.com' -m 1 /content/cf.log

root        1052       1 64 10:41 ?        00:00:00 /usr/bin/python3 /usr/local/bin/streamlit run /content/streamlit_recommender_app.py --server.port 8501 --server.headless true


**Adding Semantic embeddings**

In [10]:
!pip install -q sentence-transformers
from sentence_transformers import SentenceTransformer
import numpy as np, joblib, pandas as pd

products_df = pd.read_pickle("/content/recommender_app/models/products_df.pkl")

model = SentenceTransformer("all-MiniLM-L6-v2")

embeddings = model.encode(
    products_df["description"].fillna("").tolist(),
    show_progress_bar=True,
    batch_size=64,
    convert_to_numpy=True
)


np.save("/content/recommender_app/models/desc_embeddings.npy", embeddings)
joblib.dump(model, "/content/recommender_app/models/sentence_model.pkl")

print("Semantic embeddings created and saved.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/6 [00:00<?, ?it/s]

Semantic embeddings created and saved.


**Adding Trend scores and updating products_df**

In [11]:
import numpy as np, pandas as pd
import joblib

ratings_df = pd.read_pickle("/content/recommender_app/models/train_ratings_df.pkl")
products_df = pd.read_pickle("/content/recommender_app/models/products_df.pkl")


np.random.seed(42)
ratings_df["timestamp"] = pd.to_datetime(
    np.random.choice(pd.date_range("2024-01-01", "2025-10-01"), len(ratings_df))
)


latest = ratings_df["timestamp"].max()
ratings_df["recency_weight"] = 1 - (
    (latest - ratings_df["timestamp"]).dt.days / (latest - ratings_df["timestamp"]).dt.days.max()
)

trend = ratings_df.groupby("productId")["recency_weight"].mean()
products_df = products_df.merge(trend.rename("trend_score"), on="productId", how="left").fillna(0.5)


products_df.to_pickle("/content/recommender_app/models/products_df.pkl")

print("Trend scores added and products_df updated.")

Trend scores added and products_df updated.


 **to run the streamlit app again**

In [12]:
!pkill -f streamlit || true
!pkill -f cloudflared || true

!streamlit run /content/streamlit_recommender_app.py --server.port 8501 --server.headless true &>/content/streamlit.log &

!nohup cloudflared tunnel --url http://localhost:8501 --no-autoupdate > /content/cf.log 2>&1 &

^C
^C


In [13]:
import time
time.sleep(5)
!ps -ef | grep streamlit | grep -v grep

!sed -n '1,200p' /content/streamlit.log

!grep -o 'https://.*trycloudflare.com' -m 1 /content/cf.log

root        1275       1 11 10:41 ?        00:00:00 /usr/bin/python3 /usr/local/bin/streamlit run /content/streamlit_recommender_app.py --server.port 8501 --server.headless true

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.


  You can now view your Streamlit app in your browser.

  Local URL: http://localhost:8501
  Network URL: http://172.28.0.12:8501
  External URL: http://34.172.34.253:8501

https://vitamin-collectors-hypothetical-preferred.trycloudflare.com


**Adding Real Time User Feedback Learning Loop And Searchable Dropdown**

In [14]:
%%bash
cat > /content/streamlit_recommender_app.py <<'PY'
import streamlit as st
import pandas as pd
import numpy as np
import joblib, os, tempfile
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

@st.cache_resource
def load_artifacts():
    pred_matrix_df = joblib.load("/content/recommender_app/models/pred_matrix_df.pkl")
    svd = joblib.load("/content/recommender_app/models/truncated_svd.pkl")
    tfidf_vectorizer = joblib.load("/content/recommender_app/models/tfidf_vectorizer.pkl")
    tfidf_matrix = joblib.load("/content/recommender_app/models/tfidf_matrix.pkl")
    products_df = pd.read_pickle("/content/recommender_app/models/products_df.pkl")
    ratings_df = pd.read_pickle("/content/recommender_app/models/train_ratings_df.pkl")

    desc_embeddings = np.load("/content/recommender_app/models/desc_embeddings.npy")
    sentence_model = joblib.load("/content/recommender_app/models/sentence_model.pkl")
    return pred_matrix_df, tfidf_vectorizer, tfidf_matrix, products_df, ratings_df, desc_embeddings, sentence_model

pred_matrix_df, tfidf_vectorizer, tfidf_matrix, products_df, ratings_df, desc_embeddings, sentence_model = load_artifacts()


PROD_TO_IDX = {pid: idx for idx, pid in enumerate(products_df["productId"])}
POPULAR_ORDER = list(ratings_df["productId"].value_counts().index)


def dynamic_weights(num_ratings, min_r=5, max_r=40):
    if num_ratings <= min_r: a_cf = 0.25
    else: a_cf = 0.25 + 0.65 * min(num_ratings - min_r, max_r - min_r) / (max_r - min_r)
    return a_cf, 1.0 - a_cf

def adjust_weights_from_feedback(user_id, base_cf=0.6, base_cbf=0.4, fb_path="/content/recommender_app/models/feedback.csv"):
    if not os.path.exists(fb_path): return base_cf, base_cbf
    fb = pd.read_csv(fb_path)
    ufb = fb[fb["userId"] == user_id]
    if ufb.empty: return base_cf, base_cbf
    mean_fb = ufb["feedback_score"].mean()
    if mean_fb >= 0.6: base_cf += 0.05
    elif mean_fb <= 0.4: base_cbf += 0.05
    s = base_cf + base_cbf
    return base_cf/s, base_cbf/s


def build_explanations(user_id, pid, cf_raw, cbf_score, semantic_sim):
    reasons=[]
    cf_norm=(cf_raw-1)/4 if cf_raw is not None else 0.0
    if cf_norm>0.5: reasons.append("Users similar to you rated this product highly.")
    liked = ratings_df[(ratings_df.userId==user_id) & (ratings_df.rating>=4.0)]['productId'].tolist()
    if semantic_sim > 0.1 and liked:
        liked_idxs=[PROD_TO_IDX[l] for l in liked if l in PROD_TO_IDX]
        if pid in PROD_TO_IDX and liked_idxs:
            p_idx=PROD_TO_IDX[pid]
            liked_embeddings = desc_embeddings[liked_idxs]
            target_embedding = desc_embeddings[p_idx].reshape(1, -1)
            semantic_sims = cosine_similarity(target_embedding, liked_embeddings).flatten()
            best_liked_idx_in_list = int(np.argmax(semantic_sims))
            best_liked_pid = liked[best_liked_idx_in_list]
            best_title=products_df.loc[products_df.productId==best_liked_pid].iloc[0]['title']
            reasons.append(f"Semantically similar to your liked item: '{best_title}'.")
        else:
            reasons.append("Shares features with items you liked.")
    if pid in POPULAR_ORDER:
        rnk=POPULAR_ORDER.index(pid)+1
        if rnk<=20: reasons.append(f"Currently trending (Top {rnk} most rated).")
    if not reasons: reasons.append("Suggested by hybrid model blend.")
    return reasons

def recommend_single_product(user_id, product_id):

    num_r = len(ratings_df[ratings_df.userId==user_id])
    a_cf, a_cbf = dynamic_weights(num_r)
    a_cf, a_cbf = adjust_weights_from_feedback(user_id, a_cf, a_cbf)

    cf_raw = float(pred_matrix_df.loc[user_id, product_id]) if (user_id in pred_matrix_df.index and product_id in pred_matrix_df.columns) else 0.0
    cf_norm = (cf_raw - 1) / 4

    liked = ratings_df[(ratings_df.userId==user_id) & (ratings_df.rating>=4.0)]['productId'].tolist()
    semantic_sim = 0.0
    if liked and (product_id in PROD_TO_IDX):
        p_idx = PROD_TO_IDX[product_id]
        liked_idxs = [PROD_TO_IDX[l] for l in liked if l in PROD_TO_IDX]
        if liked_idxs:

            liked_embeddings = desc_embeddings[liked_idxs]
            target_embedding = desc_embeddings[p_idx].reshape(1, -1)
            semantic_sim = float(cosine_similarity(target_embedding, liked_embeddings).flatten().mean())

            tfidf_sim = float(cosine_similarity(tfidf_matrix[p_idx], tfidf_matrix[liked_idxs]).flatten().mean())
            cbf_sim = (tfidf_sim + semantic_sim) / 2.0

        else:
             cbf_sim = 0.0
    else:
        cbf_sim = 0.0

    score = a_cf * cf_norm + a_cbf * cbf_sim
    meta = products_df.loc[products_df.productId==product_id].iloc[0]

    result = {
        "productId": product_id,
        "title": meta["title"],
        "category": meta["category"],
        "description": meta["description"],
        "hybrid_score": float(score),
        "cf_raw": float(cf_raw),
        "cbf_sim": float(cbf_sim),
        "semantic_sim": float(semantic_sim),
        "reasons": build_explanations(user_id, product_id, cf_raw, cbf_sim, semantic_sim),
        "trend_score": float(meta.get("trend_score", 0.5))
    }

    return result, a_cf, a_cbf

st.set_page_config(layout="wide", page_title="Product Recommender App", page_icon="🛒")

st.markdown("""
    <style>
    html {
        scroll-behavior: smooth;
    }
    .stButton button {
        width: 100%;
    }
    </style>
    """, unsafe_allow_html=True)

st.title("Product Recommender App")
st.caption("Hybrid CF + CBF | Feedback Adaptive")

if 'recommendation_generated' not in st.session_state:
    st.session_state.recommendation_generated = False
if 'current_recommendation' not in st.session_state:
    st.session_state.current_recommendation = None
if 'current_user' not in st.session_state:
    st.session_state.current_user = None
if 'current_product' not in st.session_state:
    st.session_state.current_product = None
if 'current_weights' not in st.session_state:
    st.session_state.current_weights = (0.6, 0.4)

st.markdown("### Select User & Product")

users_list = sorted(ratings_df['userId'].unique())
total_users = len(users_list)

products_list = []
for idx, row in products_df.iterrows():
    product_display = f"{row['title'][:60]}... (ID: {row['productId']}) - {row['category']}"
    products_list.append({
        'display': product_display,
        'id': row['productId'],
        'title': row['title']
    })
total_products = len(products_list)

col1, col2 = st.columns(2)

with col1:
    st.markdown(f"**Select User** ({total_users} total users)")

    selected_user = st.selectbox(
        "Choose a user ID:",
        options=users_list,
        format_func=lambda x: f"User ID: {x}",
        key="user_selectbox",
        help="Search by typing user ID"
    )

    st.info(f"Selected: User **{selected_user}**")

with col2:
    st.markdown(f"**Browse Products** ({total_products} total products)")

    selected_product_display = st.selectbox(
        "Choose a product:",
        options=[p['display'] for p in products_list],
        key="product_selectbox",
        help="Search by typing product name or ID"
    )

    selected_product_id = None
    for p in products_list:
        if p['display'] == selected_product_display:
            selected_product_id = p['id']
            selected_product_title = p['title']
            break

    if selected_product_id:
        st.info(f"✓ Selected: **{selected_product_title}**")

st.markdown("---")

if st.button("🎯 Get Recommendation", type="primary", use_container_width=True):
    with st.spinner(f"Generating recommendation for User {selected_user} and Product {selected_product_id}..."):
        result, a_cf, a_cbf = recommend_single_product(selected_user, selected_product_id)
        st.session_state.current_recommendation = result
        st.session_state.current_user = selected_user
        st.session_state.current_product = selected_product_id
        st.session_state.current_weights = (a_cf, a_cbf)
        st.session_state.recommendation_generated = True
        st.success(f"Successfully generated recommendation!")

if st.session_state.recommendation_generated and st.session_state.current_recommendation is not None:
    row = st.session_state.current_recommendation
    user = st.session_state.current_user
    a_cf, a_cbf = st.session_state.current_weights

    st.markdown("---")
    st.subheader(f"Recommendation for User {user}")
    st.caption(f"Model Weights → Collaborative Filtering: {a_cf:.2f} | Content-Based: {a_cbf:.2f}")

    st.markdown("---")

    with st.container():
        st.markdown(f"### {row['title']}")
        st.caption(f"Product ID: {row['productId']} | Category: {row['category']}")

        metric_col1, metric_col2, metric_col3, metric_col4, metric_col5 = st.columns(5)
        with metric_col1:
            st.metric("Hybrid Score", f"{row['hybrid_score']:.3f}")
        with metric_col2:
            st.metric("CF Score", f"{row['cf_raw']:.3f}")
        with metric_col3:
            st.metric("CBF Similarity", f"{row['cbf_sim']:.3f}")
        with metric_col4:
            st.metric("Semantic Sim", f"{row['semantic_sim']:.3f}")
        with metric_col5:
             st.metric("Trend Score", f"{row['trend_score']:.3f}")


        st.markdown("**Why this recommendation?**")
        for reason in row["reasons"]:
            st.markdown(f"- {reason}")


        with st.expander("View Full Description"):
            st.write(row["description"])


        FEEDBACK_PATH = "/content/recommender_app/models/feedback.csv"
        os.makedirs(os.path.dirname(FEEDBACK_PATH), exist_ok=True)

        def append_feedback_row(row_dict):
            header_needed = not os.path.exists(FEEDBACK_PATH)
            df_fb = pd.DataFrame([row_dict])
            try:
                df_fb.to_csv(FEEDBACK_PATH, index=False, header=header_needed, mode="a")
            except Exception:
                with open(FEEDBACK_PATH, "a") as f:
                    if header_needed:
                        f.write(",".join(df_fb.columns.tolist()) + "\n")
                    f.write(",".join(map(str, list(row_dict.values()))) + "\n")

        pid = row["productId"]


        state_like_key = f"liked_{user}_{pid}"
        state_dislike_key = f"disliked_{user}_{pid}"


        if state_like_key not in st.session_state:
            st.session_state[state_like_key] = False
        if state_dislike_key not in st.session_state:
            st.session_state[state_dislike_key] = False


        def handle_like():
            fb = {
                "timestamp": datetime.utcnow().isoformat(),
                "userId": user,
                "productId": pid,
                "feedback_score": 1
            }
            append_feedback_row(fb)
            st.session_state[state_like_key] = True
            st.session_state[state_dislike_key] = False

        def handle_dislike():
            fb = {
                "timestamp": datetime.utcnow().isoformat(),
                "userId": user,
                "productId": pid,
                "feedback_score": 0
            }
            append_feedback_row(fb)
            st.session_state[state_dislike_key] = True
            st.session_state[state_like_key] = False

        def handle_undo_like():
            st.session_state[state_like_key] = False

        def handle_undo_dislike():
            st.session_state[state_dislike_key] = False


        st.markdown("**Your Feedback:**")
        fb_col1, fb_col2, fb_col3 = st.columns([1, 1, 2])

        with fb_col1:
            if not st.session_state[state_like_key]:
                if st.button("Like", key=f"like_{pid}"):
                    handle_like()
                    st.rerun()
            else:
                st.success("Liked!")
                if st.button("Undo", key=f"undo_like_{pid}"):
                    handle_undo_like()
                    st.rerun()

        with fb_col2:
            if not st.session_state[state_dislike_key]:
                if st.button("Dislike", key=f"dislike_{pid}"):
                    handle_dislike()
                    st.rerun()
            else:
                st.error("Disliked!")
                if st.button("Undo", key=f"undo_dislike_{pid}"):
                    handle_undo_dislike()
                    st.rerun()

        with fb_col3:
            if st.session_state[state_like_key]:
                st.info("Great! We'll show you more like this.")
            elif st.session_state[state_dislike_key]:
                st.info("Noted! We'll adjust your recommendations.")

        st.markdown("---")


st.sidebar.header("System Statistics")
st.sidebar.metric("Total Products", len(products_df))
st.sidebar.metric("Total Users", len(ratings_df['userId'].unique()))
st.sidebar.metric("Total Ratings", len(ratings_df))

st.sidebar.header("How It Works")
st.sidebar.markdown("""
- **Collaborative Filtering (CF)**: Learns from similar users' preferences
- **Content-Based Filtering (CBF)**: Uses product features and descriptions
- **Dynamic Weights**: Automatically adapts based on user feedback
- **Searchable Dropdowns**: Easily find any user or product
- **Real-time Feedback**: Like/dislike to improve recommendations
- **Trend Score**: Reflects recent popularity
""")

if st.session_state.recommendation_generated:
    st.sidebar.success(f"Active: Recommendation for User {st.session_state.current_user} & Product {st.session_state.current_product}")

st.sidebar.markdown("---")
st.sidebar.info("💡 **Tip**: Use the search bar in dropdowns to quickly find users or products by typing!")
PY


pkill -f streamlit || true
pkill -f cloudflared || true
sleep 2
nohup streamlit run /content/streamlit_recommender_app.py --server.port 8501 --server.headless true > /content/streamlit.log 2>&1 &
nohup cloudflared tunnel --url http://localhost:8501 --no-autoupdate > /content/cf.log 2>&1 &
sleep 5
grep -o "https://[a-z0-9.-]*\\.trycloudflare\\.com" -m 1 /content/cf.log || true

https://griffin-objects-drove-age.trycloudflare.com


**Testing Feedback**

In [15]:
import os, pandas as pd
p="/content/recommender_app/models/feedback.csv"
print("exists:", os.path.exists(p))
if os.path.exists(p):
    display(pd.read_csv(p).tail(10))

exists: False


**Updating UI of Streamlit APP**

In [18]:
%%bash
set -e

cat > /content/streamlit_recommender_app.py << 'ENDOFPYTHON'
import streamlit as st
import pandas as pd
import numpy as np
import joblib, os
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

st.set_page_config(
    layout="wide",
    page_title="Product Recommender App",
    page_icon="",
    initial_sidebar_state="collapsed"
)

@st.cache_resource
def load_artifacts():
    pred_matrix_df = joblib.load("/content/recommender_app/models/pred_matrix_df.pkl")
    tfidf_vectorizer = joblib.load("/content/recommender_app/models/tfidf_vectorizer.pkl")
    tfidf_matrix = joblib.load("/content/recommender_app/models/tfidf_matrix.pkl")
    products_df = pd.read_pickle("/content/recommender_app/models/products_df.pkl")
    ratings_df = pd.read_pickle("/content/recommender_app/models/train_ratings_df.pkl")

    try:
        desc_embeddings = np.load("/content/recommender_app/models/desc_embeddings.npy")
        sentence_model = joblib.load("/content/recommender_app/models/sentence_model.pkl")
    except FileNotFoundError:
        st.warning("Semantic embeddings or model not found. Please run the embedding generation step.")
        desc_embeddings = None
        sentence_model = None

    return pred_matrix_df, tfidf_vectorizer, tfidf_matrix, products_df, ratings_df, desc_embeddings, sentence_model

pred_matrix_df, tfidf_vectorizer, tfidf_matrix, products_df, ratings_df, desc_embeddings, sentence_model = load_artifacts()

if 'theme' not in st.session_state:
    st.session_state.theme = 'light'
if 'selected_category' not in st.session_state:
    st.session_state.selected_category = 'All'
if 'recommendation_generated' not in st.session_state:
    st.session_state.recommendation_generated = False
if 'current_recommendation' not in st.session_state:
    st.session_state.current_recommendation = None
if 'current_user' not in st.session_state:
    st.session_state.current_user = None

def apply_theme():
    if st.session_state.theme == 'dark':
        primary_bg = "#0e1117"
        secondary_bg = "#1a1d29"
        card_bg = "#262730"
        text_color = "#fafafa"
        text_secondary = "#b0b0b0"
        border = "#3a3a4a"
        input_bg = "#1a1d29"
        input_text = "#fafafa"
        button_text = "#ffffff"
        expander_bg = "#1a1d29"
        expander_border = "#3a3a4a"
        card_shadow = "0 4px 20px rgba(0,0,0,0.3)"
        hover_shadow = "0 8px 32px rgba(0,0,0,0.4)"
        gradient_overlay = "linear-gradient(135deg, rgba(255,75,75,0.05) 0%, rgba(14,165,233,0.05) 100%)"
    else:
        primary_bg = "#f8fafc"
        secondary_bg = "linear-gradient(135deg, #fef3f2 0%, #eff6ff 100%)"
        card_bg = "#ffffff"
        text_color = "#0f172a"
        text_secondary = "#64748b"
        border = "#e2e8f0"
        input_bg = "#ffffff"
        input_text = "#0f172a"
        button_text = "#0f172a"
        expander_bg = "#ffffff"
        expander_border = "#e2e8f0"
        card_shadow = "0 10px 40px rgba(0,0,0,0.08), 0 2px 8px rgba(0,0,0,0.04)"
        hover_shadow = "0 20px 60px rgba(0,0,0,0.12), 0 4px 16px rgba(0,0,0,0.06)"
        gradient_overlay = "linear-gradient(135deg, rgba(255,75,75,0.03) 0%, rgba(14,165,233,0.03) 100%)"

    st.markdown(f"""
    <style>
    @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;700;800&display=swap');

    #MainMenu {{visibility: hidden;}}
    footer {{visibility: hidden;}}
    .stDeployButton {{display: none;}}
    header {{visibility: hidden;}}

    .stApp {{
        padding-top: 0 !important;
        margin-top: 0 !important;
        background: {primary_bg if st.session_state.theme == 'dark' else 'linear-gradient(to bottom, #f8fafc 0%, #ffffff 50%, #fef3f2 100%)'} !important;
        font-family: 'Inter', sans-serif !important;
    }}
    div[data-testid="stDecoration"] {{
        display: none !important;
    }}

    .main {{
        background: transparent !important;
        color: {text_color} !important;
        padding: 0 !important;
    }}

    .block-container {{
        padding: 0 !important;
        max-width: 100% !important;
        background: transparent !important;
    }}

    .stMarkdown, .stMarkdown p, .stMarkdown span, .stMarkdown div {{
        color: {text_color} !important;
        font-family: 'Inter', sans-serif !important;
    }}

    .stSelectbox > div > div {{
        background: {input_bg} !important;
        border: 2px solid {'#cbd5e1' if st.session_state.theme == 'light' else border} !important;
        color: {input_text} !important;
        border-radius: 12px !important;
        box-shadow: {'0 2px 8px rgba(0,0,0,0.04)' if st.session_state.theme == 'light' else 'none'} !important;
        transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
    }}

    .stSelectbox > div > div:hover {{
        border-color: {'#3b82f6' if st.session_state.theme == 'light' else border} !important;
        box-shadow: {'0 4px 12px rgba(59,130,246,0.15)' if st.session_state.theme == 'light' else 'none'} !important;
        transform: translateY(-1px) !important;
    }}

    .stSelectbox label {{
        color: {text_color} !important;
        font-weight: 600 !important;
        font-size: 0.95rem !important;
        margin-bottom: 0.5rem !important;
    }}

    .stSelectbox [data-baseweb="select"] {{
        background: {input_bg} !important;
    }}

    [data-baseweb="popover"] {{
        background: {input_bg} !important;
        border-radius: 12px !important;
        box-shadow: {'0 10px 40px rgba(0,0,0,0.1)' if st.session_state.theme == 'light' else card_shadow} !important;
        border: {'2px solid #e2e8f0' if st.session_state.theme == 'light' else 'none'} !important;
    }}

    [role="listbox"] {{
        background: {input_bg} !important;
        border-radius: 12px !important;
    }}

    [role="option"] {{
        background: {input_bg} !important;
        color: {input_text} !important;
        padding: 0.75rem 1rem !important;
        font-weight: 500 !important;
        transition: all 0.2s ease !important;
    }}

    [role="option"]:hover {{
        background: {'linear-gradient(135deg, #fef3f2 0%, #eff6ff 100%)' if st.session_state.theme == 'light' else secondary_bg} !important;
        transform: translateX(4px) !important;
    }}

    .stButton > button {{
        border-radius: 12px !important;
        font-weight: 600 !important;
        padding: 0.85rem 2rem !important;
        border: none !important;
        transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
        width: 100% !important;
        color: {button_text} !important;
        background: {secondary_bg if st.session_state.theme == 'light' else secondary_bg} !important;
        font-family: 'Inter', sans-serif !important;
        letter-spacing: 0.3px !important;
        box-shadow: {'0 2px 8px rgba(0,0,0,0.04)' if st.session_state.theme == 'light' else 'none'} !important;
    }}

    .stButton > button:hover {{
        transform: translateY(-3px) !important;
        box-shadow: {'0 8px 24px rgba(0,0,0,0.12)' if st.session_state.theme == 'light' else '0 4px 12px rgba(0,0,0,0.15)'} !important;
    }}

    .stButton > button[kind="primary"] {{
        background: linear-gradient(135deg, #ff4b4b 0%, #ff6b6b 50%, #0ea5e9 100%) !important;
        color: white !important;
        box-shadow: {'0 8px 24px rgba(255,75,75,0.25)' if st.session_state.theme == 'light' else '0 4px 12px rgba(255,75,75,0.3)'} !important;
        position: relative !important;
        overflow: hidden !important;
    }}

    .stButton > button[kind="primary"]::before {{
        content: '' !important;
        position: absolute !important;
        top: 0 !important;
        left: -100% !important;
        width: 100% !important;
        height: 100% !important;
        background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent) !important;
        transition: left 0.5s !important;
    }}

    .stButton > button[kind="primary"]:hover::before {{
        left: 100% !important;
    }}

    .stButton > button[kind="primary"]:hover {{
        transform: translateY(-4px) scale(1.02) !important;
        box-shadow: {'0 12px 32px rgba(255,75,75,0.35)' if st.session_state.theme == 'light' else '0 8px 20px rgba(255,75,75,0.4)'} !important;
    }}

    .streamlit-expanderHeader {{
        background: {expander_bg} !important;
        border: 2px solid {expander_border} !important;
        color: {text_color} !important;
        border-radius: 12px !important;
        font-weight: 600 !important;
        transition: all 0.3s ease !important;
        box-shadow: {'0 2px 8px rgba(0,0,0,0.04)' if st.session_state.theme == 'light' else 'none'} !important;
    }}

    .streamlit-expanderHeader:hover {{
        border-color: {'#3b82f6' if st.session_state.theme == 'light' else expander_border} !important;
        box-shadow: {'0 4px 12px rgba(59,130,246,0.1)' if st.session_state.theme == 'light' else 'none'} !important;
    }}

    .streamlit-expanderContent {{
        background: {expander_bg} !important;
        border: 2px solid {expander_border} !important;
        color: {text_color} !important;
        border-radius: 12px !important;
        margin-top: 0.5rem !important;
        box-shadow: {'0 2px 8px rgba(0,0,0,0.04)' if st.session_state.theme == 'light' else 'none'} !important;
    }}

    .stSpinner > div {{
        border-top-color: {'#3b82f6' if st.session_state.theme == 'light' else text_color} !important;
    }}

    .stSuccess {{
        background: {'linear-gradient(135deg, #ecfdf5 0%, #d1fae5 100%)' if st.session_state.theme == 'light' else secondary_bg} !important;
        color: {'#065f46' if st.session_state.theme == 'light' else text_color} !important;
        border-left: 4px solid #10b981 !important;
        border-radius: 12px !important;
        font-weight: 600 !important;
        box_shadow: {'0 4px 12px rgba(16,185,129,0.1)' if st.session_state.theme == 'light' else 'none'} !important;
    }}

    .stError {{
        background: {'linear-gradient(135deg, #fef2f2 0%, #fee2e2 100%)' if st.session_state.theme == 'light' else secondary_bg} !important;
        color: {'#991b1b' if st.session_state.theme == 'light' else text_color} !important;
        border_left: 4px solid #ef4444 !important;
        border-radius: 12px !important;
        font-weight: 600 !important;
        box_shadow: {'0 4px 12px rgba(239,68,68,0.1)' if st.session_state.theme == 'light' else 'none'} !important;
    }}

    .stCheckbox {{
        color: {text_color} !important;
    }}

    .stCheckbox label {{
        color: white !important;
        font-weight: 600 !important;
    }}

    .stCheckbox > label > div {{
        color: white !important;
    }}

    .nav-categories .stButton > button {{
        padding: 0.5rem 1.2rem !important;
        font-size: 0.95rem !important;
        white-space: nowrap !important;
        overflow: hidden;
        text-overflow: ellipsis !important;
        max_width: 150px !important;
        background: rgba(255, 255, 255, 0.3) !important;
        color: white !important;
        border: 2px solid rgba(255, 255, 255, 0.5) !important;
        backdrop-filter: blur(10px) !important;
        font-weight: 700 !important;
        letter-spacing: 0.5px !important;
        border-radius: 20px !important;
        transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
    }}

    .nav-categories .stButton > button:hover {{
        background: rgba(255, 255, 255, 0.5) !important;
        border-color: rgba(255, 255, 255, 0.7) !important;
        transform: translateY(-3px) scale(1.02) !important;
        box-shadow: 0 6px 16px rgba(0,0,0,0.2) !important;
    }}

    .nav-categories {{
        display: flex;
        gap: 0.75rem;
        flex-wrap: wrap;
        justify-content: center;
        max-width: 1400px;
        margin: 0 auto;
    }}

    .nav-container {{
        background: linear-gradient(135deg, #ff4b4b 0%, #ff6b6b 25%, #f97316 50%, #3b82f6 75%, #0ea5e9 100%);
        padding: 1.5rem 2rem;
        box-shadow: 0 4px 20px rgba(0,0,0,0.15);
        position: sticky;
        top: 0;
        z-index: 1000;
        position: relative;
        overflow: hidden;
    }}

    .nav-container::before {{
        content: '';
        position: absolute;
        top: -50%;
        left: -50%;
        width: 200%;
        height: 200%;
        background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, transparent 70%);
        animation: rotate 20s linear infinite;
    }}

    @keyframes rotate {{
        from {{ transform: rotate(0deg); }}
        to {{ transform: rotate(360deg); }}
    }}

    .nav-header {{
        display: flex;
        justify_content: space_between;
        align_items: center;
        max_width: 1400px;
        margin: 0 auto 1rem;
        position: relative;
        z_index: 1;
    }}

    .nav-logo {{
        font-size: 2rem;
        font-weight: 800;
        color: white;
        margin: 0;
        text_shadow: 2px 2px 4px rgba(0,0,0,0.2);
        letter-spacing: -0.5px;
    }}

    .content-wrapper {{
        max_width: 800px;
        margin: 2rem auto;
        padding: 0 1rem;
        background: transparent !important;
    }}

    .search-card {{
        background: {card_bg} !important;
        border: {'2px solid ' + border if st.session_state.theme == 'light' else '1px solid ' + border} !important;
        border-radius: 20px !important;
        padding: {'2rem' if st.session_state.theme == 'light' else '1rem'} !important;
        margin-bottom: 2rem !important;
        box_shadow: {card_shadow} !important;
        transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
        position: relative !important;
        overflow: hidden !important;
    }}

    .search-card::before {{
        content: '' !important;
        position: absolute !important;
        top: 0 !important;
        left: 0 !important;
        width: 100% !important;
        height: 4px !important;
        background: linear_gradient(90deg, #ff4b4b 0%, #f97316 50%, #0ea5e9 100%) !important;
    }}

    .search-card:hover {{
        transform: translateY(-4px) !important;
        box_shadow: {hover_shadow} !important;
    }}

    .search-title {{
        font-size: {'1.75rem' if st.session_state.theme == 'light' else '1.5rem'} !important;
        font-weight: 800 !important;
        color: {text_color} !important;
        margin_bottom: 1.5rem !important;
        letter-spacing: -0.5px !important;
        background: {'linear_gradient(135deg, #ff4b4b 0%, #0ea5e9 100%)' if st.session_state.theme == 'light' else 'none'} !important;
        -webkit-background-clip: {'text' if st.session_state.theme == 'light' else 'unset'} !important;
        -webkit-text-fill-color: {'transparent' if st.session_state.theme == 'light' else text_color} !important;
        background_clip: {'text' if st.session_state.theme == 'light' else 'unset'} !important;
    }}

    .product-card {{
        background: {card_bg} !important;
        border: {'2px solid ' + border if st.session_state.theme == 'light' else '1px solid ' + border} !important;
        border-radius: 24px !important;
        padding: {'2rem' if st.session_state.theme == 'light' else '1rem'} !important;
        box_shadow: {card_shadow} !important;
        transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1) !important;
        position: relative !important;
        overflow: hidden !important;
    }}

    .product-card::before {{
        content: '' !important;
        position: absolute !important;
        top: 0 !important;
        left: 0 !important;
        width: 100% !important;
        height: 100% !important;
        background: {gradient_overlay} !important;
        pointer-events: none !important;
    }}

    .product-card:hover {{
        transform: translateY(-6px) !important;
        box_shadow: {hover_shadow} !important;
    }}

    .product-title {{
        font-size: {'2.25rem' if st.session_state.theme == 'light' else '2rem'} !important;
        font-weight: 800 !important;
        color: {text_color} !important;
        margin: 0 0 0.5rem 0 !important;
        line-height: 1.2 !important;
        letter-spacing: -0.5px !important;
    }}

    .product-meta {{
        display: flex;
        gap: 1rem;
        align-items: center;
        color: {text_secondary} !important;
        font-size: 0.95rem !important;
        margin_bottom: 1.5rem !important;
        font-weight: 500 !important;
    }}

    .category-badge {{
        display: inline-block;
        background: linear_gradient(135deg, #ff4b4b 0%, #f97316 50%, #0ea5e9 100%);
        color: white !important;
        padding: 0.5rem 1.5rem !important;
        border-radius: 24px !important;
        font-weight: 700 !important;
        font-size: 0.9rem !important;
        box_shadow: {'0 4px 12px rgba(255,75,75,0.25)' if st.session_state.theme == 'light' else '0 2px 8px rgba(255,75,75,0.3)'} !important;
        letter-spacing: 0.3px !important;
    }}

    .recommendation-score {{
        display: flex;
        flex-direction: column;
        align_items: center;
        justify_content: center;
        background: linear_gradient(135deg, #10b981 0%, #34d399 100%);
        color: white !important;
        padding: 2rem !important;
        border-radius: 20px !important;
        min_width: 160px !important;
        box_shadow: {'0 8px 24px rgba(16,185,129,0.3)' if st.session_state.theme == 'light' else '0 4px 12px rgba(16,185,129,0.3)'} !important;
        position: relative !important;
        overflow: hidden !important;
    }}

    .recommendation-score::before {{
        content: '';
        position: absolute;
        top: -50%;
        left: -50%;
        width: 200%;
        height: 200%;
        background: radial-gradient(circle, rgba(255,255,255,0.2) 0%, transparent 70%);
        animation: pulse 3s ease-in-out infinite;
    }}

    @keyframes pulse {{
        0%, 100% {{ transform: scale(1); opacity: 0.5; }}
        50% {{ transform: scale(1.1); opacity: 0.8; }}
    }}

    .score-value {{
        font-size: 3rem !important;
        font-weight: 900 !important;
        line-height: 1 !important;
        margin_bottom: 0.5rem !important;
        color: white !important;
        position: relative !important;
        z_index: 1 !important;
    }}

    .score-label {{
        font-size: 0.85rem !important;
        font-weight: 700 !important;
        text_transform: uppercase !important;
        letter-spacing: 1px !important;
        opacity: 0.95 !important;
        color: white !important;
        position: relative !important;
        z_index: 1 !important;
    }}

    .reasons-title {{
        font-size: {'1.4rem' if st.session_state.theme == 'light' else '1.2rem'} !important;
        font-weight: 800 !important;
        color: {text_color} !important;
        margin: 2rem 0 1rem !important;
        letter-spacing: -0.3px !important;
    }}

    .reason-item {{
        background: {'linear-gradient(135deg, #f0f9ff 0%, #e0f2fe 100%)' if st.session_state.theme == 'light' else secondary_bg} !important;
        border_left: 5px solid #0ea5e9 !important;
        padding: 1.25rem !important;
        margin: 1rem 0 !important;
        border-radius: 12px !important;
        color: {text_color} !important;
        display: flex !important;
        align_items: center !important;
        gap: 1rem !important;
        font-weight: 500 !important;
        box_shadow: {'0 4px 12px rgba(14,165,233,0.1)' if st.session_state.theme == 'light' else 'none'} !important;
        transition: all 0.3s ease !important;
    }}

    .reason-item:hover {{
        transform: translateX(8px) !important;
        box_shadow: {'0 6px 16px rgba(14,165,233,0.15)' if st.session_state.theme == 'light' else 'none'} !important;
    }}

    .reason-icon {{
        font-size: 1.5rem;
    }}

    .footer {{
        background: {'linear-gradient(135deg, #fef3f2 0%, #eff6ff 100%)' if st.session_state.theme == 'light' else secondary_bg} !important;
        border_top: {'2px solid ' + border if st.session_state.theme == 'light' else '1px solid ' + border} !important;
        padding: 2rem !important;
        text_align: center;
        color: {text_color} !important;
        width: 100%;
        margin_top: 3rem !important;
        box_shadow: {'0 -4px 12px rgba(0,0,0,0.04)' if st.session_state.theme == 'light' else 'none'} !important;
    }}

    .footer-text {{
        color: {text_secondary} !important;
        font-size: 0.9rem !important;
        font-weight: 600 !important;
        letter-spacing: 0.3px !important;
    }}
    </style>
    """, unsafe_allow_html=True)

apply_theme()

PROD_TO_IDX = {pid: idx for idx, pid in enumerate(products_df["productId"])}
POPULAR_ORDER = list(ratings_df["productId"].value_counts().index)

def build_explanations(user_id, pid, cf_raw, cbf_sim, semantic_sim, trend_score):
    reasons = []
    cf_norm = (cf_raw-1)/4 if cf_raw is not None else 0.0

    if cf_norm > 0.5:
        reasons.append("Users similar to you rated this product highly.")

    liked = ratings_df[(ratings_df.userId==user_id) & (ratings_df.rating>=4.0)]['productId'].tolist()
    if semantic_sim > 0.1 and liked and desc_embeddings is not None:
        liked_idxs = [PROD_TO_IDX[l] for l in liked if l in PROD_TO_IDX]
        if pid in PROD_TO_IDX and liked_idxs:
            p_idx = PROD_TO_IDX[pid]
            liked_embeddings = desc_embeddings[liked_idxs]
            target_embedding = desc_embeddings[p_idx].reshape(1, -1)
            semantic_sims = cosine_similarity(target_embedding, liked_embeddings).flatten()
            best_liked_idx_in_list = int(np.argmax(semantic_sims))
            best_liked_pid = liked[best_liked_idx_in_list]
            best_title=products_df.loc[products_df.productId==best_liked_pid].iloc[0]['title']
            reasons.append(f"Semantically similar to your liked item: '{best_title[:40]}...'.")
        else:
            reasons.append("Shares features with items you liked.")

    if pid in POPULAR_ORDER:
        rnk = POPULAR_ORDER.index(pid) + 1
        if rnk <= 20:
            reasons.append(f"Currently trending (Top {rnk} most rated).")

    if trend_score > 0.7:
        reasons.append("Recently popular among users.")

    if not reasons:
        reasons.append("Suggested by hybrid model blend.")

    return reasons


def recommend_single_product(user_id, product_id):
    cf_raw = float(pred_matrix_df.loc[user_id, product_id]) if (user_id in pred_matrix_df.index and product_id in pred_matrix_df.columns) else 0.0
    cf_norm = (cf_raw - 1) / 4

    cbf_sim = 0.0
    semantic_sim = 0.0
    user_liked_items = ratings_df[(ratings_df.userId==user_id) & (ratings_df.rating>=4.0)]['productId'].tolist()

    if user_liked_items and (product_id in PROD_TO_IDX):
        p_idx = PROD_TO_IDX[product_id]
        liked_idxs = [PROD_TO_IDX[l] for l in user_liked_items if l in PROD_TO_IDX]
        if liked_idxs:
            tfidf_sim = float(cosine_similarity(tfidf_matrix[p_idx], tfidf_matrix[liked_idxs]).flatten().mean())

            if desc_embeddings is not None:
                liked_embeddings = desc_embeddings[liked_idxs]
                target_embedding = desc_embeddings[p_idx].reshape(1, -1)
                semantic_sim = float(cosine_similarity(target_embedding, liked_embeddings).flatten().mean())
                cbf_sim = (tfidf_sim + semantic_sim) / 2.0
            else:
                cbf_sim = tfidf_sim
        else:
             cbf_sim = 0.0
    else:
        cbf_sim = 0.0

    meta = products_df.loc[products_df.productId==product_id].iloc[0]
    trend_score = float(meta.get("trend_score", 0.5))

    cf_weight, cbf_weight, trend_weight = 0.5, 0.4, 0.1
    hybrid_score = (cf_weight * cf_norm) + (cbf_weight * cbf_sim) + (trend_weight * trend_score)
    match_percentage = min(max(hybrid_score * 100, 0), 100)

    result = {
        "productId": product_id,
        "title": meta["title"],
        "category": meta["category"],
        "description": meta["description"],
        "cf_raw": float(cf_raw),
        "cbf_sim": float(cbf_sim),
        "semantic_sim": float(semantic_sim),
        "trend_score": float(trend_score),
        "match_percentage": match_percentage,
        "reasons": build_explanations(user_id, product_id, cf_raw, cbf_sim, semantic_sim, trend_score)
    }

    return result

categories = ['All'] + sorted(products_df['category'].unique().tolist())

st.markdown("""
<div class="nav-container">
    <div class="nav-header">
        <h1 class="nav-logo">Product Recommender App</h1>
    </div>
    <div class="nav-categories">
""", unsafe_allow_html=True)

col_nav = st.columns(len(categories) + 1)
with col_nav[0]:
    theme_label = " Dark" if st.session_state.theme == 'light' else " Light"
    is_dark_mode = st.checkbox(theme_label, value=(st.session_state.theme == 'dark'), key="theme_toggle_checkbox")
    if is_dark_mode and st.session_state.theme == 'light':
        st.session_state.theme = 'dark'
        st.rerun()
    elif not is_dark_mode and st.session_state.theme == 'dark':
        st.session_state.theme = 'light'
        st.rerun()

for idx, cat in enumerate(categories):
    with col_nav[idx + 1]:
        if st.button(cat, key=f"cat_{cat}", use_container_width=True):
            st.session_state.selected_category = cat
            st.rerun()

st.markdown("</div></div>", unsafe_allow_html=True)

st.markdown('<div class="content-wrapper">', unsafe_allow_html=True)

st.markdown('<div class="search-card">', unsafe_allow_html=True)
st.markdown('<div class="search-title">Find Your Perfect Product</div>', unsafe_allow_html=True)

users_list = sorted(ratings_df['userId'].unique())

if st.session_state.selected_category == 'All':
    filtered_products = products_df
else:
    filtered_products = products_df[products_df['category'] == st.session_state.selected_category]

products_list = []
for idx, row in filtered_products.iterrows():
    product_display = f"{row['title'][:60]}... [{row['category']}]"
    products_list.append({
        'display': product_display,
        'id': row['productId'],
        'title': row['title'],
        'category': row['category']
    })

col1, col2 = st.columns(2)

with col1:
    selected_user = st.selectbox(
        "Select User Profile",
        options=users_list,
        format_func=lambda x: f"User {x}",
        key="user_select"
    )

with col2:
    selected_product_display = st.selectbox(
        f"Browse Products ({len(products_list)} available)",
        options=[p['display'] for p in products_list],
        key="product_select"
    )

    selected_product_id = None
    for p in products_list:
        if p['display'] == selected_product_display:
            selected_product_id = p['id']
            break

st.markdown('</div>', unsafe_allow_html=True)

if st.button("Get Personalized Recommendation", type="primary", use_container_width=True):
    with st.spinner("AI analyzing your preferences..."):
        result = recommend_single_product(selected_user, selected_product_id)
        st.session_state.current_recommendation = result
        st.session_state.current_user = selected_user
        st.session_state.recommendation_generated = True
        st.success("Recommendation ready!")
        st.rerun()

if st.session_state.recommendation_generated and st.session_state.current_recommendation:
    row = st.session_state.current_recommendation
    user = st.session_state.current_user

    st.markdown('<div class="product-card">', unsafe_allow_html=True)

    col_info, col_score = st.columns([3, 1])
    with col_score:
        st.markdown(f'''
        <div class="recommendation-score">
            <div class="score-value">{int(row['match_percentage'])}%</div>
            <div class="score-label">Match</div>
        </div>
        ''', unsafe_allow_html=True)

    with col_info:
        st.markdown(f'<div class="product-title">{row["title"]}</div>', unsafe_allow_html=True)
        st.markdown(f'''
        <div class="product-meta">
            <span class="category-badge">{row["category"]}</span>
            <span>ID: {row["productId"]}</span>
        </div>
        ''', unsafe_allow_html=True)

    st.markdown('<div class="reasons-title">Why We Recommend This?</div>', unsafe_allow_html=True)
    for reason in row["reasons"]:
        st.markdown(f'<div class="reason-item"><span>{reason}</span></div>', unsafe_allow_html=True)

    with st.expander("Product Details"):
        st.write(row["description"])

    FEEDBACK_PATH = "/content/recommender_app/models/feedback.csv"
    os.makedirs(os.path.dirname(FEEDBACK_PATH), exist_ok=True)

    def save_feedback(score):
        header_needed = not os.path.exists(FEEDBACK_PATH)
        fb_data = {
            "timestamp": datetime.utcnow().isoformat(),
            "userId": user,
            "productId": row["productId"],
            "feedback_score": score
        }
        pd.DataFrame([fb_data]).to_csv(FEEDBACK_PATH, index=False, header=header_needed, mode="a")

    pid = row["productId"]
    like_key = f"liked_{user}_{pid}"
    dislike_key = f"disliked_{user}_{pid}"

    if like_key not in st.session_state:
        st.session_state[like_key] = False
    if dislike_key not in st.session_state:
        st.session_state[dislike_key] = False

    fb_col1, fb_col2 = st.columns(2)

    with fb_col1:
        if not st.session_state[like_key]:
            if st.button("I Like This", key=f"like_{pid}", use_container_width=True):
                save_feedback(1)
                st.session_state[like_key] = True
                st.session_state[dislike_key] = False
                st.rerun()
        else:
            st.success("You liked this!")
            if st.button("Undo", key=f"undo_like_{pid}", use_container_width=True):
                st.session_state[like_key] = False
                st.rerun()

    with fb_col2:
        if not st.session_state[dislike_key]:
            if st.button("Not Interested", key=f"dislike_{pid}", use_container_width=True):
                save_feedback(0)
                st.session_state[dislike_key] = True
                st.session_state[like_key] = False
                st.rerun()
        else:
            st.error(" Marked as not interested")
            if st.button("Undo", key=f"undo_dislike_{pid}", use_container_width=True):
                st.session_state[dislike_key] = False
                st.rerun()

    st.markdown('</div>', unsafe_allow_html=True)

st.markdown('</div>', unsafe_allow_html=True)

total_products = len(products_df)
total_users = len(ratings_df['userId'].unique())
total_ratings = len(ratings_df)

st.markdown(f"""
<div class="footer">
    <div class="footer-text">
        Product Recommender App  {total_products} Products  {total_users} Users  {total_ratings} Ratings  2025
    </div>
</div>
""", unsafe_allow_html=True)
ENDOFPYTHON

wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -O /usr/local/bin/cloudflared 2>/dev/null || true
chmod +x /usr/local/bin/cloudflared 2>/dev/null || true

pkill -f streamlit 2>/dev/null || true
pkill -f cloudflared 2>/dev/null || true
sleep 2

nohup streamlit run /content/streamlit_recommender_app.py \
    --server.port 8501 \
    --server.headless true \
    --server.enableXsrfProtection false \
    --server.enableCORS false \
    --browser.gatherUsageStats false \
    > /content/streamlit.log 2>&1 &

sleep 3

if ! pgrep -f "streamlit run" > /dev/null; then
    echo "ERROR: Streamlit failed to start"
    tail -20 /content/streamlit.log
    exit 1
fi


nohup cloudflared tunnel --url http://localhost:8501 --no-autoupdate > /content/cf.log 2>&1 &

URL=""
for i in {1..30}; do
    URL=$(grep -o "https://[a-z0-9.-]*\.trycloudflare\.com" /content/cf.log 2>/dev/null | head -1)
    if [ -n "$URL" ]; then
        break
    fi
    sleep 1
done

if [ -n "$URL" ]; then
    echo "$URL"
else
    echo "ERROR: Could not get Cloudflare URL"
    tail -30 /content/streamlit.log
    tail -30 /content/cf.log
    exit 1
fi

https://alumni-observe-mostly-beta.trycloudflare.com
