In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

def generate_sales_data(start_date='2023-01-01', periods=365):
    """Generate daily sales data for different channels"""
    dates = pd.date_range(start=start_date, periods=periods)
    
    data = []
    for date in dates:
        # Basic daily data
        retail_sales = np.random.normal(22000000, 3000000)  # ~8B annually
        online_sales = np.random.normal(9000000, 1500000)   # ~3.3B annually
        kohls_sales = np.random.normal(5000000, 1000000)    # Partnership sales
        
        # Add seasonality
        if date.month in [11, 12]:  # Holiday season
            retail_sales *= 1.5
            online_sales *= 1.8
            kohls_sales *= 1.4
        
        data.append({
            'date': date,
            'retail_sales': retail_sales,
            'online_sales': online_sales,
            'kohls_sales': kohls_sales,
            'total_sales': retail_sales + online_sales + kohls_sales,
            'week': date.isocalendar()[1],
            'month': date.month,
            'year': date.year
        })
    
    return pd.DataFrame(data)

def generate_digital_metrics(sales_df):
    """Generate digital experience and AI metrics"""
    digital_data = []
    
    for _, row in sales_df.iterrows():
        visitors = int(np.random.normal(500000, 50000))
        
        digital_data.append({
            'date': row['date'],
            'website_visitors': visitors,
            'mobile_app_users': int(visitors * 0.6),
            'conversion_rate': np.random.normal(0.03, 0.005),
            'ai_recommendations_shown': int(visitors * 0.8),
            'ai_driven_purchases': int(visitors * 0.15),
            'virtual_try_ons': int(visitors * 0.25),
            'personalized_emails_sent': int(visitors * 0.4),
            'email_engagement_rate': np.random.normal(0.22, 0.03),
            'app_engagement_minutes': np.random.normal(8, 2)
        })
    
    return pd.DataFrame(digital_data)

def generate_customer_segments():
    """Generate customer segment data"""
    segments = ['Beauty Enthusiast', 'Occasional Buyer', 'Luxury Seeker', 'Value Hunter', 'Gen Z Explorer']
    data = []
    
    for segment in segments:
        data.append({
            'segment': segment,
            'customer_count': int(np.random.normal(2000000, 300000)),
            'avg_basket_size': np.random.normal(65, 15),
            'annual_purchase_frequency': np.random.normal(6, 1.5),
            'online_shopping_preference': np.random.normal(0.4, 0.1),
            'ai_feature_adoption_rate': np.random.normal(0.3, 0.08),
            'loyalty_program_participation': np.random.normal(0.7, 0.1)
        })
    
    return pd.DataFrame(data)

def generate_ai_investments():
    """Generate AI initiative investment data"""
    initiatives = [
        'Personalization Engine',
        'Virtual Try-On Technology',
        'Customer Service AI',
        'Inventory Optimization AI',
        'Predictive Analytics',
        'Voice Commerce',
        'AR Shopping Experience',
        'AI Beauty Advisor'
    ]
    
    data = []
    for initiative in initiatives:
        data.append({
            'initiative': initiative,
            'investment_2024': np.random.normal(5000000, 1000000),
            'projected_investment_2025': np.random.normal(8000000, 1500000),
            'roi_2024': np.random.normal(0.15, 0.05),
            'projected_roi_2025': np.random.normal(0.25, 0.07),
            'implementation_progress': np.random.normal(0.7, 0.15),
            'customer_adoption_rate': np.random.normal(0.3, 0.1)
        })
    
    return pd.DataFrame(data)

# Generate all datasets
sales_df = generate_sales_data()
digital_df = generate_digital_metrics(sales_df)
segments_df = generate_customer_segments()
ai_investments_df = generate_ai_investments()

# # Example usage
# print("\nSales Data Sample:")
# print(sales_df.head())
# print("\nDigital Metrics Sample:")
# print(digital_df.head())
# print("\nCustomer Segments:")
# print(segments_df)
# print("\nAI Investments:")
# print(ai_investments_df)

Systematically learn visualization:
1. Numerical (continuous only?)
2. Categorical

Then for each data type:
1. Univariate
2. Bivariate
3. Multivariate

In [2]:
# Let's start with sales
sales_df

Unnamed: 0,date,retail_sales,online_sales,kohls_sales,total_sales,week,month,year
0,2023-01-01,1.370706e+07,1.126119e+07,4.818738e+06,2.978698e+07,52,1,2023
1,2023-01-02,1.837960e+07,7.595381e+06,4.157202e+06,3.013219e+07,1,1,2023
2,2023-01-03,1.925692e+07,7.685645e+06,3.713617e+06,3.065618e+07,1,1,2023
3,2023-01-04,2.070502e+07,9.229683e+06,5.481550e+06,3.541625e+07,1,1,2023
4,2023-01-05,1.830606e+07,1.023471e+07,3.593580e+06,3.213435e+07,1,1,2023
...,...,...,...,...,...,...,...,...
360,2023-12-27,2.814070e+07,1.670052e+07,6.713341e+06,5.155457e+07,52,12,2023
361,2023-12-28,3.547148e+07,2.078468e+07,6.829283e+06,6.308544e+07,52,12,2023
362,2023-12-29,3.575000e+07,1.394386e+07,8.188217e+06,5.788208e+07,52,12,2023
363,2023-12-30,3.732588e+07,1.675750e+07,6.265416e+06,6.034880e+07,52,12,2023


In [3]:
ai_investments_df

Unnamed: 0,initiative,investment_2024,projected_investment_2025,roi_2024,projected_roi_2025,implementation_progress,customer_adoption_rate
0,Personalization Engine,6312656.0,10153980.0,0.164195,0.453005,0.686893,0.38363
1,Virtual Try-On Technology,5593311.0,6143401.0,0.073391,0.199443,0.924334,0.123816
2,Customer Service AI,4080972.0,8333659.0,0.117567,0.38902,0.626032,0.332693
3,Inventory Optimization AI,5557812.0,8046791.0,0.18782,0.288124,0.716207,0.602951
4,Predictive Analytics,5190199.0,8816549.0,0.111815,0.382944,0.952314,0.331848
5,Voice Commerce,4925893.0,8668999.0,0.143522,0.260506,1.000342,0.269501
6,AR Shopping Experience,4043356.0,5806524.0,0.174901,0.255213,0.566544,0.283491
7,AI Beauty Advisor,6568882.0,6860339.0,0.07263,0.192358,0.766146,0.173232


In [4]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming we have our previously generated dataframes:
# sales_df, digital_df, segments_df, ai_investments_df

def analyze_channel_performance(sales_df):
    """Analyze and visualize sales channel performance"""
    # Monthly channel performance
    monthly_sales = sales_df.groupby('month').agg({
        'retail_sales': 'sum',
        'online_sales': 'sum',
        'kohls_sales': 'sum'
    }).reset_index()
    
    # Create stacked bar chart
    fig = px.bar(monthly_sales, x='month',
                 y=['retail_sales', 'online_sales', 'kohls_sales'],
                 title='Monthly Sales by Channel',
                 labels={'value': 'Sales ($)', 'month': 'Month'},
                 template='plotly_white')
    
    fig.update_layout(barmode='stack')
    
    return fig

def analyze_digital_engagement(digital_df):
    """Analyze digital engagement metrics"""
    # Calculate key metrics
    metrics = {
        'Avg Daily Visitors': digital_df['website_visitors'].mean(),
        'Conversion Rate': digital_df['conversion_rate'].mean() * 100,
        'AI-Driven Purchase Rate': (digital_df['ai_driven_purchases'] / 
                                  digital_df['website_visitors']).mean() * 100
    }
    
    # Create time series of engagement metrics
    fig = make_subplots(rows=2, cols=1,
                       subplot_titles=('Website Traffic and Conversions',
                                     'AI Feature Engagement'))
    
    # Traffic and conversions
    fig.add_trace(
        go.Scatter(x=digital_df['date'], 
                  y=digital_df['website_visitors'],
                  name='Website Visitors'),
        row=1, col=1
    )
    
    # AI engagement
    fig.add_trace(
        go.Scatter(x=digital_df['date'],
                  y=digital_df['ai_recommendations_shown'],
                  name='AI Recommendations',
                  line=dict(color='orange')),
        row=2, col=1
    )
    fig.add_trace(
        go.Scatter(x=digital_df['date'],
                  y=digital_df['virtual_try_ons'],
                  name='Virtual Try-Ons',
                  line=dict(color='green')),
        row=2, col=1
    )
    
    fig.update_layout(height=800, title_text='Digital Engagement Analysis')
    
    return metrics, fig

def analyze_customer_segments(segments_df):
    """Analyze customer segment performance"""
    # Calculate segment value
    segments_df['annual_value'] = (segments_df['customer_count'] * 
                                 segments_df['avg_basket_size'] * 
                                 segments_df['annual_purchase_frequency'])
    
    # Create bubble chart
    fig = px.scatter(segments_df,
                    x='ai_feature_adoption_rate',
                    y='loyalty_program_participation',
                    size='annual_value',
                    color='segment',
                    title='Customer Segment Analysis',
                    labels={'ai_feature_adoption_rate': 'AI Feature Adoption Rate',
                           'loyalty_program_participation': 'Loyalty Program Participation'},
                    template='plotly_white')
    
    return fig

def analyze_ai_investments(ai_investments_df):
    """Analyze AI investment ROI and priorities"""
    # Calculate ROI improvement
    ai_investments_df['roi_improvement'] = (ai_investments_df['projected_roi_2025'] - 
                                          ai_investments_df['roi_2024'])
    
    # Create ROI comparison chart
    fig = go.Figure(data=[
        go.Bar(name='2024 ROI', x=ai_investments_df['initiative'],
               y=ai_investments_df['roi_2024']),
        go.Bar(name='2025 Projected ROI', x=ai_investments_df['initiative'],
               y=ai_investments_df['projected_roi_2025'])
    ])
    
    fig.update_layout(
        title='AI Initiative ROI Comparison',
        barmode='group',
        xaxis_tickangle=-45,
        template='plotly_white'
    )
    
    # Priority matrix based on ROI and adoption
    fig2 = px.scatter(ai_investments_df,
                     x='projected_roi_2025',
                     y='customer_adoption_rate',
                     size='projected_investment_2025',
                     color='implementation_progress',
                     text='initiative',
                     title='AI Initiative Priority Matrix')
    
    return fig, fig2

# Generate summary insights
def generate_key_insights(sales_df, digital_df, segments_df, ai_investments_df):
    insights = {
        'sales_metrics': {
            'total_revenue': sales_df['total_sales'].sum(),
            'online_ratio': sales_df['online_sales'].sum() / sales_df['total_sales'].sum(),
            'kohls_contribution': sales_df['kohls_sales'].sum() / sales_df['total_sales'].sum()
        },
        'digital_metrics': {
            'avg_conversion': digital_df['conversion_rate'].mean(),
            'ai_impact': digital_df['ai_driven_purchases'].sum() / digital_df['website_visitors'].sum()
        },
        'top_segment': segments_df.loc[segments_df['annual_value'].idxmax(), 'segment'],
        'top_ai_initiative': ai_investments_df.loc[
            ai_investments_df['projected_roi_2025'].idxmax(), 'initiative'
        ]
    }
    return insights

# Create dashboard layout
def create_dashboard():
    # Generate all visualizations
    channel_fig = analyze_channel_performance(sales_df)
    digital_metrics, digital_fig = analyze_digital_engagement(digital_df)
    segment_fig = analyze_customer_segments(segments_df)
    roi_fig, priority_fig = analyze_ai_investments(ai_investments_df)
    insights = generate_key_insights(sales_df, digital_df, segments_df, ai_investments_df)
    
    # Return all components for dashboard assembly
    return {
        'channel_performance': channel_fig,
        'digital_engagement': digital_fig,
        'segment_analysis': segment_fig,
        'roi_analysis': roi_fig,
        'priority_matrix': priority_fig,
        'metrics': digital_metrics,
        'insights': insights
    }

In [5]:
analyze_channel_performance(sales_df)

In [6]:
analyze_digital_engagement(digital_df)[0]

{'Avg Daily Visitors': 501969.1835616438,
 'Conversion Rate': 3.0075797793194003,
 'AI-Driven Purchase Rate': 14.999903949352358}

In [7]:
analyze_digital_engagement(digital_df)[1]

In [8]:
 analyze_customer_segments(segments_df)

Gen Z would be higher in AI Feature Adoption Rate...

In [9]:
analyze_ai_investments(ai_investments_df)[0]

In [10]:
analyze_ai_investments(ai_investments_df)[1]

Interesting that those with high implementation_progress would not have high customer_adoption_rate. usually it is high customer_adoption_rate -> high projected_roi_2025 -> high implementation_progress

In [11]:
generate_key_insights(sales_df, digital_df, segments_df, ai_investments_df)

{'sales_metrics': {'total_revenue': 14336425776.690742,
  'online_ratio': 0.26093397474991037,
  'kohls_contribution': 0.133332678061288},
 'digital_metrics': {'avg_conversion': 0.030075797793194,
  'ai_impact': 0.1499990459491832},
 'top_segment': 'Beauty Enthusiast',
 'top_ai_initiative': 'Personalization Engine'}

# Created [dashboard](http://192.168.1.250:8501) using these tables and metrics, but with Steamlit app
[Streamlit Dashboard Python Script](https://github.com/mindyng/Scratch-Work/blob/main/sephora_dash.py)