In [1]:
pip install streamlit pandas statsmodels

Note: you may need to restart the kernel to use updated packages.


In [2]:
import streamlit as st
import pandas as pd
import statsmodels.api as sm

In [3]:
# --- CONFIGURATION ---
st.set_page_config(page_title="Vodka Sales Predictor", layout="wide")
st.title("ðŸ“Š Vodka Sales Prediction Tool")

2025-12-14 17:36:36.708 
  command:

    streamlit run C:\Users\tdksi\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


DeltaGenerator()

In [4]:
# --- 1. LOAD & TRAIN MODEL ---
@st.cache_resource
def train_model():
    # Load the training data (Assumes svedka.csv is in the same folder)
    try:
        df = pd.read_csv("C:/Users/tdksi/OneDrive - Illinois Institute of Technology/MS Marketing Analytics/Quantitatie Marketing Models/ssignment 1/svedka.csv")
    except FileNotFoundError:
        st.error("Training data 'svedka.csv' not found. Please add it to the file directory.")
        return None, None

    # Preprocessing based on PDF "Model 3" logic
    # Create Dummy Variables for Tier
    # The PDF specifies: Tier 1, Tier 2 (Tier 3 is the baseline)
    df['Tier_1'] = df['Tier'].apply(lambda x: 1 if x == 1 else 0)
    df['Tier_2'] = df['Tier'].apply(lambda x: 1 if x == 2 else 0)

    # Define Predictors (X) and Target (y)
    # Based on PDF Snippet: priceperunit, broad, outdoor, mag, news, tier1, tier2
    features = ['PricePerUnit', 'Broad', 'Outdoor', 'Mag', 'News', 'Tier_1', 'Tier_2']
    target = 'TotalSales'

    # Drop rows with missing values in these columns to avoid errors
    df_clean = df.dropna(subset=features + [target])

    X = df_clean[features]
    y = df_clean[target]

    # Add constant for Intercept
    X = sm.add_constant(X)

    # Fit OLS Model
    model = sm.OLS(y, X).fit()
    
    return model, features

model, model_features = train_model()

In [5]:
# --- 2. SIDEBAR & TABS ---
st.sidebar.header("App Instructions")
st.sidebar.info(
    """
    1. **Model Stats:** View the regression performance based on historical data.
    2. **Make Predictions:** Upload a new CSV file to forecast sales.
    
    **Required Columns for Upload:**
    - PricePerUnit
    - Broad, Outdoor, Mag, News (Ad spend)
    - Tier (1, 2, or 3)
    """
)

DeltaGenerator(_root_container=1, _parent=DeltaGenerator())

In [6]:
tab1, tab2 = st.tabs(["ðŸ“¤ Predict Sales (Input)", "ðŸ“ˆ Model Statistics"])

In [7]:
# --- TAB 1: PREDICTION ---
with tab1:
    st.header("Predict Sales on New Data")
    
    uploaded_file = st.file_uploader("Upload your input CSV file", type=["csv"])
    
    if uploaded_file is not None and model is not None:
        input_df = pd.read_csv(uploaded_file)
        
        # --- Preprocessing Input Data ---
        # We must replicate the exact feature creation used in training
        if 'Tier' in input_df.columns:
            input_df['Tier_1'] = input_df['Tier'].apply(lambda x: 1 if x == 1 else 0)
            input_df['Tier_2'] = input_df['Tier'].apply(lambda x: 1 if x == 2 else 0)
            
            # Ensure all required columns exist
            missing_cols = [col for col in model_features if col not in input_df.columns]
            
            if not missing_cols:
                # Prepare X for prediction
                X_pred = input_df[model_features]
                X_pred = sm.add_constant(X_pred, has_constant='add')
                
                # Predict
                predictions = model.predict(X_pred)
                
                # Append to dataframe
                input_df['Predicted_TotalSales'] = predictions
                
                st.success("âœ… Prediction Complete!")
                
                # Preview
                st.subheader("Results Preview")
                st.dataframe(input_df.head())
                
                # Download Button
                csv = input_df.to_csv(index=False).encode('utf-8')
                st.download_button(
                    label="Download Predictions as CSV",
                    data=csv,
                    file_name="vodka_sales_predictions.csv",
                    mime="text/csv",
                )
            else:
                st.error(f"Error: The uploaded file is missing these columns: {missing_cols}")
        else:
            st.error("Error: Input file must contain a 'Tier' column.")

In [8]:
# --- TAB 2: MODEL STATS ---
with tab2:
    if model is not None:
        st.header("Regression Model Performance (Model 3)")
        st.write("This model was trained on the `svedka.csv` data provided.")
        
        # Display Summary
        st.text(model.summary())
    else:
        st.warning("Model could not be trained. Check svedka.csv.")