# Dashboard Development

## Setup and Streamlit Import Test

In [1]:
# Verify Streamlit works and import necessary libraries

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
import math
import sys
import os

# Add src directory to path for custom modules
sys.path.append('../src')

print("Libraries imported successfully")
print("Streamlit version:", st.__version__)

# Test basic Streamlit components
print("Testing Streamlit components")

Libraries imported successfully
Streamlit version: 1.50.0
Testing Streamlit components


## Create Dashboard Layout

In [2]:
#  Create the basic structure of the A/B testing dashboard

# Initialize session state for data persistence
if 'data_loaded' not in st.session_state:
    st.session_state.data_loaded = False
if 'df' not in st.session_state:
    st.session_state.df = None

# Main title and description
st.title("A/B Testing Analysis Dashboard")
st.markdown("Upload your A/B test data to analyze statistical significance and get business recommendations")

# Create sidebar for navigation and inputs
st.sidebar.title("Navigation")
st.sidebar.markdown("Configure your analysis settings")

# Main content area tabs
tab1, tab2, tab3 = st.tabs(["Data Upload", "Statistical Analysis", "Visualizations"])

print("Dashboard layout structure created")

2025-10-28 14:29:47.655 
  command:

    streamlit run /opt/anaconda3/envs/ab-testing/lib/python3.9/site-packages/ipykernel_launcher.py [ARGUMENTS]


Dashboard layout structure created


## Data Upload Functionality

In [3]:
# Create file uploader and data validation

with tab1:
    st.header("Data Upload and Validation")
    
    # File upload section
    uploaded_file = st.file_uploader(
        "Upload your A/B test data (CSV format)", 
        type=['csv'],
        help="Your CSV should contain columns for test groups and conversions"
    )
    
    if uploaded_file is not None:
        try:
            # Load the data
            df = pd.read_csv(uploaded_file)
            st.session_state.df = df
            st.session_state.data_loaded = True
            
            # Display data info
            st.success(f"Data loaded successfully: {df.shape[0]} rows, {df.shape[1]} columns")
            
            # Show data preview
            st.subheader("Data Preview")
            st.dataframe(df.head(10))
            
            # Show basic information
            st.subheader("Data Information")
            col1, col2 = st.columns(2)
            
            with col1:
                st.write("Column Names:")
                st.write(df.columns.tolist())
                
            with col2:
                st.write("Data Types:")
                st.write(df.dtypes)
                
        except Exception as e:
            st.error(f"Error loading file: {str(e)}")
    else:
        st.info("Please upload a CSV file to begin analysis")
        
print("Data upload interface created")



Data upload interface created


## Statistical Analysis Display

In [4]:
# Display statistical results when data is available

with tab2:
    st.header("Statistical Analysis Results")
    
    if st.session_state.data_loaded:
        df = st.session_state.df
        
        # Check if required columns exist
        required_columns = ['test_group', 'converted']
        missing_columns = [col for col in required_columns if col not in df.columns]
        
        if missing_columns:
            st.error(f"Missing required columns: {missing_columns}")
            st.info("Please ensure your data has 'test_group' and 'converted' columns")
        else:
            # Clean and prepare data
            df_clean = df.copy()
            if df_clean['converted'].dtype == 'bool':
                df_clean['converted'] = df_clean['converted'].astype(int)
            
            # Calculate basic metrics
            conversion_summary = df_clean.groupby('test_group').agg({
                'converted': ['count', 'sum', 'mean']
            })
            conversion_summary.columns = ['user_count', 'conversions', 'conversion_rate']
            
            # Display metrics
            st.subheader("Conversion Metrics")
            col1, col2, col3 = st.columns(3)
            
            # Extract values for each group
            groups = conversion_summary.index.tolist()
            if len(groups) >= 2:
                group_a, group_b = groups[0], groups[1]
                
                with col1:
                    st.metric(
                        f"Group {group_a} Conversion Rate",
                        f"{conversion_summary.loc[group_a, 'conversion_rate']:.3%}",
                        help=f"{conversion_summary.loc[group_a, 'conversions']} conversions out of {conversion_summary.loc[group_a, 'user_count']} users"
                    )
                
                with col2:
                    st.metric(
                        f"Group {group_b} Conversion Rate", 
                        f"{conversion_summary.loc[group_b, 'conversion_rate']:.3%}",
                        help=f"{conversion_summary.loc[group_b, 'conversions']} conversions out of {conversion_summary.loc[group_b, 'user_count']} users"
                    )
                
                with col3:
                    difference = conversion_summary.loc[group_b, 'conversion_rate'] - conversion_summary.loc[group_a, 'conversion_rate']
                    st.metric(
                        "Difference",
                        f"{difference:.3%}",
                        delta=f"{difference:.3%}" if difference != 0 else None
                    )
            
            # Placeholder for statistical test results
            st.subheader("Statistical Significance")
            st.info("Statistical tests will be implemented in the next step")
            
    else:
        st.warning("Please upload data in the 'Data Upload' tab to see analysis results")

print("Statistical analysis interface created")



Statistical analysis interface created


## Visualization Prototype

In [5]:
# Create charts and graphs for data visualization

with tab3:
    st.header("Data Visualizations")
    
    if st.session_state.data_loaded:
        df = st.session_state.df
        
        if 'test_group' in df.columns and 'converted' in df.columns:
            # Prepare data for visualization
            df_viz = df.copy()
            if df_viz['converted'].dtype == 'bool':
                df_viz['converted'] = df_viz['converted'].astype(int)
            
            # Create conversion rate chart
            st.subheader("Conversion Rate Comparison")
            
            conversion_rates = df_viz.groupby('test_group')['converted'].mean()
            
            fig, ax = plt.subplots(figsize=(10, 6))
            bars = ax.bar(conversion_rates.index, conversion_rates.values, 
                         color=['lightblue', 'lightcoral'], alpha=0.7)
            
            ax.set_title('Conversion Rates by Test Group', fontsize=14, fontweight='bold')
            ax.set_ylabel('Conversion Rate', fontsize=12)
            ax.set_xlabel('Test Group', fontsize=12)
            ax.grid(axis='y', alpha=0.3)
            
            # Add value labels on bars
            for bar, rate in zip(bars, conversion_rates.values):
                height = bar.get_height()
                ax.text(bar.get_x() + bar.get_width()/2., height + 0.001,
                       f'{rate:.3f}', ha='center', va='bottom', fontweight='bold')
            
            st.pyplot(fig)
            
            # User distribution chart
            st.subheader("User Distribution")
            
            user_counts = df_viz['test_group'].value_counts()
            
            fig2, ax2 = plt.subplots(figsize=(10, 6))
            bars2 = ax2.bar(user_counts.index, user_counts.values, 
                           color=['lightgreen', 'orange'], alpha=0.7)
            
            ax2.set_title('User Distribution by Test Group', fontsize=14, fontweight='bold')
            ax2.set_ylabel('Number of Users', fontsize=12)
            ax2.set_xlabel('Test Group', fontsize=12)
            ax2.grid(axis='y', alpha=0.3)
            
            # Add value labels on bars
            for bar, count in zip(bars2, user_counts.values):
                height = bar.get_height()
                ax2.text(bar.get_x() + bar.get_width()/2., height + 50,
                        f'{count:,}', ha='center', va='bottom', fontweight='bold')
            
            st.pyplot(fig2)
            
        else:
            st.error("Required columns 'test_group' and 'converted' not found in data")
    else:
        st.warning("Please upload data in the 'Data Upload' tab to see visualizations")

print("Visualization interface created")



Visualization interface created


## Test the Dashboard

In [6]:
#  Test the dashboard functionality with sample data

st.sidebar.header("Test Configuration")
st.sidebar.markdown("Use these options to test the dashboard")

# Add a test button to load sample data
if st.sidebar.button("Load Sample Data for Testing"):
    # Create sample data similar to your marketing dataset
    sample_data = {
        'user_id': range(1000),
        'test_group': np.random.choice(['ad', 'psa'], 1000),
        'converted': np.random.choice([0, 1], 1000, p=[0.97, 0.03]),
        'total_ads': np.random.randint(1, 100, 1000)
    }
    
    sample_df = pd.DataFrame(sample_data)
    st.session_state.df = sample_df
    st.session_state.data_loaded = True
    st.success("Sample data loaded successfully!")
    st.rerun()

# Display current status
st.sidebar.subheader("Current Status")
if st.session_state.data_loaded:
    st.sidebar.success("Data Loaded")
    st.sidebar.write(f"Rows: {st.session_state.df.shape[0]}")
    st.sidebar.write(f"Columns: {st.session_state.df.shape[1]}")
else:
    st.sidebar.info("No Data Loaded")

print("Dashboard testing components added")
print("Notebook 4: Dashboard Development prototype completed")



Dashboard testing components added
Notebook 4: Dashboard Development prototype completed
