# Product Opportunity Radar (Colab)
Run the pipeline, visualize results, and optionally launch Streamlit.

In [None]:
!pip -q install pandas numpy scikit-learn plotly streamlit pyngrok -U

## Create demo data (or overwrite with your own)

In [None]:

from pathlib import Path
import pandas as pd
base = Path('/content/product-opportunity-radar'); (base/'data').mkdir(parents=True, exist_ok=True)
# write the same CSVs as in the repo (abbreviated for brevity)
pd.read_csv('/content/drive/MyDrive/if_exists_use_your_own.csv') if False else None  # placeholder
# For convenience, we'll just download from your repo if you publish it; otherwise we generate synthetic data below.
pd.DataFrame([
 ["2025-06-01","P100","Wearables",199.0,0.10,120,True],
 ["2025-06-02","P100","Wearables",199.0,0.00,60,True],
], columns=["date","product_id","category","price","discount_pct","units","in_stock"]).to_csv(base/'data'/'transactions.csv', index=False)


## Pipeline & charts

In [None]:

# (Re)define the minimal pipeline inline for Colab
import pandas as pd, numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from pathlib import Path
DATA_DIR = Path('/content/product-opportunity-radar/data')
# ensure other CSVs exist by copying from the repo download (or use the ones you created)


In [None]:

# Load data (if files missing, create tiny fallbacks)
def ensure_csv(path, df):
    if not path.exists(): df.to_csv(path, index=False)

ensure_csv(DATA_DIR/'search_logs.csv', pd.DataFrame([["2025-06-01 10:01","smart watch gps",0,0,0]], columns=["ts","query","results_found","clicks","added_to_cart"]))
ensure_csv(DATA_DIR/'reviews.csv', pd.DataFrame([["2025-06-01","P100",3,"Battery could be better"]], columns=["date","product_id","rating","review_text"]))
ensure_csv(DATA_DIR/'catalog.csv', pd.DataFrame([["P100","Apex Smartwatch","Wearables","GPS, Heart-rate, Sleep"]], columns=["product_id","name","category","features"]))
ensure_csv(DATA_DIR/'competitors.csv', pd.DataFrame([["BrandZ","Wearables","BrandZ GPS Pro","GPS, Waterproof, 10-day battery"]], columns=["competitor","category","product_name","key_features"]))

sales = pd.read_csv(DATA_DIR/'transactions.csv')
searches = pd.read_csv(DATA_DIR/'search_logs.csv')
reviews = pd.read_csv(DATA_DIR/'reviews.csv')
catalog = pd.read_csv(DATA_DIR/'catalog.csv')
competitors = pd.read_csv(DATA_DIR/'competitors.csv')
print("Files ready in", DATA_DIR)


In [None]:

# Basic computations (unmet signal + trivial score)
s = searches.copy(); s["unmet"] = ((s["results_found"]==0) | (s["clicks"]==0)).astype(int)
unmet = s.groupby("query").agg(searches=("query","count"), unmet=("unmet","sum")).reset_index()
unmet["unmet_signal"] = unmet["unmet"]/unmet["searches"]
unmet["category"] = ["Wearables"]*len(unmet)
opp = unmet.groupby("category").agg(unmet_signal=("unmet_signal","sum"), search_volume=("searches","sum")).reset_index()
opp["opportunity_score"] = (opp["unmet_signal"]-opp["unmet_signal"].min())/(opp["unmet_signal"].max()-opp["unmet_signal"].min()+1e-9)
opp


In [None]:

import plotly.express as px
fig = px.bar(opp, x="category", y="opportunity_score", title="Opportunity Score by Category")
fig.show()


## Optional: Launch Streamlit in Colab

In [None]:

# Write a minimal Streamlit app and launch via pyngrok
from pathlib import Path
app = Path('/content/product-opportunity-radar/app.py')
app.write_text("""
import streamlit as st, pandas as pd
st.set_page_config(layout='wide', page_title='Product Opportunity Radar (Colab)')
st.title('📈 Product Opportunity Radar (Colab)')
opp = pd.read_csv('/content/product-opportunity-radar/data/opportunities.csv') if (Path('/content/product-opportunity-radar/data/opportunities.csv')).exists() else None
st.write('If opportunities.csv exists, it will be shown below.')
if opp is not None:
    st.dataframe(opp, use_container_width=True)
""")
from pyngrok import ngrok, conf
# conf.get_default().monitor_thread = False  # optional
public_url = ngrok.connect(8501, "http").public_url
print("Public URL:", public_url)
import subprocess, time
subprocess.Popen(["streamlit", "run", str(app), "--server.port", "8501", "--server.headless", "true"])
