In [13]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression


NUMERIC_COLS = ['age', 'listening_time', 'songs_played_per_day', 'skip_rate', 'ads_listened_per_week']


@st.cache_data
def load_and_prepare():
    df = pd.read_csv("spotify_churn_32000.csv")
    df = df.drop_duplicates().dropna()

    features = ['gender','age','country','subscription_type',
                'listening_time','songs_played_per_day','skip_rate',
                'device_type','ads_listened_per_week','offline_listening']

    churn_features_df = df[features].copy()

    mc_features_columns = ['gender','country','subscription_type','device_type']

    encoders = {}
    ohe_frames = []
    for col in mc_features_columns:
        enc = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
        arr = enc.fit_transform(churn_features_df[[col]])
        cols = [f"{col}_{c}" for c in enc.categories_[0]]
        ohe_frames.append(pd.DataFrame(arr, columns=cols, index=churn_features_df.index))
        encoders[col] = {'enc': enc, 'cols': cols}

    churn_features_transformed_df = pd.concat([churn_features_df.reset_index(drop=True)] + ohe_frames, axis=1)
    churn_features_transformed_df = churn_features_transformed_df.drop(columns=mc_features_columns, axis=1)

    scaler = StandardScaler()
    churn_features_transformed_df[NUMERIC_COLS] = scaler.fit_transform(churn_features_transformed_df[NUMERIC_COLS])

    return df, churn_features_transformed_df, encoders, scaler

@st.cache_resource
def load_model():
    with open("churn_best_model.saved","rb") as f:
        m = pickle.load(f)
    return m

def vector_from_inputs(inputs, encoders, scaler, feature_order_df):
    base = pd.DataFrame([inputs])

    ohe_parts = []
    for col, meta in encoders.items():
        enc = meta['enc']
        cols = meta['cols']
        arr = enc.transform(base[[col]])
        ohe_parts.append(pd.DataFrame(arr, columns=cols))

    vect = pd.concat([base.reset_index(drop=True)] + ohe_parts, axis=1)
    vect = vect.drop(columns=list(encoders.keys()), axis=1)

    vect[NUMERIC_COLS] = scaler.transform(vect[NUMERIC_COLS])

    vect = vect.reindex(columns=feature_order_df.columns, fill_value=0)
    return vect.values.flatten()

def get_scaler_index(col_name):
    """Returns the index of a column in the scaler's array"""
    return NUMERIC_COLS.index(col_name)

st.set_page_config(layout="wide", page_title="Churn Intervention Simulator")
st.title("Interactive Churn Intervention Simulator")

raw_df, feature_df, encoders, scaler = load_and_prepare()
model = load_model()

left, mid, right = st.columns([1,1,1])
with left:
    st.header("1) Choose user profile")
    plan = st.selectbox("Subscription Type", options=['Free','Premium','Family','Student'])
    gender = st.selectbox("Gender", options=sorted(raw_df['gender'].unique()))
    country = st.selectbox("Country", options=sorted(raw_df['country'].unique()))
    device = st.selectbox("Device", options=sorted(raw_df['device_type'].unique()))

    age = st.slider("Age", 13, 80, 28)
    listening_time = st.slider("Listening time (min/day)", 0, 300, 60)
    songs_played = st.slider("Songs played per day", 0, 200, 30)
    skip_rate = st.slider("Skip rate (0-1)", 0.0, 1.0, 0.25, step=0.01)
    ads = st.slider("Ads listened per week", 0, 60, 6)
    offline = st.selectbox("Offline listening", options=[0,1], index=0)

    if st.button("Calculate base risk"):
        user_inputs = {
            'gender': gender, 'age': age, 'country': country, 'subscription_type': plan,
            'listening_time': listening_time, 'songs_played_per_day': songs_played,
            'skip_rate': skip_rate, 'device_type': device,
            'ads_listened_per_week': ads, 'offline_listening': offline
        }
        user_vec = vector_from_inputs(user_inputs, encoders, scaler, feature_df)
        base_prob = model.predict_proba(user_vec.reshape(1,-1))[:,1][0]

        st.session_state['user_inputs'] = user_inputs
        st.session_state['vec'] = user_vec
        st.session_state['base_prob'] = float(base_prob)
        st.session_state['cur_prob'] = float(base_prob)
        st.session_state['budget'] = 0.0
        st.session_state['actions'] = []
        st.session_state['use_counts'] = {'ad_detox':0, 'engage_boost':0, 'mobile_msg':0, 'country_msg':0, 'premium_trial':0}
        st.success(f"Base churn prob: {base_prob:.3f}")

with mid:
    st.header("2) Live Risk & KPI")
    if 'cur_prob' in st.session_state:
        st.metric("Churn Probability", f"{st.session_state['cur_prob']:.3f}")
        st.metric("Base Probability", f"{st.session_state['base_prob']:.3f}")
        st.metric("Budget Spent", f"${st.session_state['budget']:.2f}")
        st.write("**Actions Log:**")
        for a in st.session_state['actions']:
            st.caption(f"✅ {a['name']} (${a['cost']:.2f}) → Risk: {a['new_prob']:.3f}")
    else:
        st.info("Set profile and click 'Calculate base risk' to start.")

with right:
    st.header("3) Apply Interventions")
    if 'vec' in st.session_state:

        if st.button("Ad Detox (Remove 5 ads, $0.50)"):
            cur = st.session_state['vec'].copy()
            col_name = 'ads_listened_per_week'

            vec_idx = feature_df.columns.get_loc(col_name)
            scaler_idx = get_scaler_index(col_name)

            std = np.sqrt(scaler.var_[scaler_idx])
            scaled_decrease = 5.0 / std
            cur[vec_idx] = cur[vec_idx] - scaled_decrease

            newp = model.predict_proba(cur.reshape(1,-1))[:,1][0]
            cost = 0.50
            st.session_state['budget'] += cost
            st.session_state['actions'].append({'name':'Ad Detox', 'cost':cost, 'new_prob':float(newp)})
            st.session_state['vec'] = cur
            st.session_state['cur_prob'] = float(newp)
            st.rerun()

        if st.button("Engage Boost (Push Discovery, $1)"):
            cur = st.session_state['vec'].copy()

            l_col = 'listening_time'
            l_idx = feature_df.columns.get_loc(l_col)
            l_scale_idx = get_scaler_index(l_col)
            l_std = np.sqrt(scaler.var_[l_scale_idx])
            cur[l_idx] += (20.0 / l_std)

            s_col = 'skip_rate'
            s_idx = feature_df.columns.get_loc(s_col)
            s_scale_idx = get_scaler_index(s_col)
            s_std = np.sqrt(scaler.var_[s_scale_idx])
            cur[s_idx] -= (0.05 / s_std)

            newp = model.predict_proba(cur.reshape(1,-1))[:,1][0]
            cost = 1.00
            st.session_state['budget'] += cost
            st.session_state['actions'].append({'name':'Engage Boost', 'cost':cost, 'new_prob':float(newp)})
            st.session_state['vec'] = cur
            st.session_state['cur_prob'] = float(newp)
            st.rerun()

        if st.button("Offer 1-Month Premium ($6)"):
            cur = st.session_state['vec'].copy()
            for c in feature_df.columns:
                if c.startswith('subscription_'):
                    cur[feature_df.columns.get_loc(c)] = 1.0 if c == 'subscription_Premium' else 0.0

            a_col = 'ads_listened_per_week'
            a_idx = feature_df.columns.get_loc(a_col)
            cur[a_idx] = -0.51

            newp = model.predict_proba(cur.reshape(1,-1))[:,1][0]
            cost = 6.00
            st.session_state['budget'] += cost
            st.session_state['actions'].append({'name':'Premium Trial', 'cost':cost, 'new_prob':float(newp)})
            st.session_state['vec'] = cur
            st.session_state['cur_prob'] = float(newp)
            st.rerun()

    else:
        st.write("Waiting for profile...")

st.markdown("---")
if 'actions' in st.session_state and st.session_state['actions']:
    st.subheader("Impact Trajectory")
    df_plot = pd.DataFrame(
        {'Step': ['Start'] + [a['name'] for a in st.session_state['actions']],
        'Risk': [st.session_state['base_prob']] + [a['new_prob'] for a in st.session_state['actions']]
        }
    )
    st.line_chart(df_plot.set_index('Step'))

Overwriting app.py


##Download Model
##Run these commands to open dashboard
cd ~/Downloads streamlit
streamlit run app.py