In [1]:
# Master Analysis Notebook - Comprehensive E-commerce Analytics
# Integrate all analysis notebooks for easy execution and summary generation

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sqlalchemy import create_engine
import warnings
warnings.filterwarnings('ignore')

# Database connection
engine = create_engine('mysql+pymysql://root:lxx030327@localhost/mavenfuzzyfactory')

# Load core data
print("📊 Loading core datasets...")
pageviews_df = pd.read_sql("SELECT * FROM website_pageviews", engine)
orders_df = pd.read_sql("SELECT * FROM orders", engine)
website_sessions_df = pd.read_sql("SELECT * FROM website_sessions", engine)

print("✅ Data loaded successfully!")
print(f"• Website pageviews: {len(pageviews_df):,} records")
print(f"• Orders: {len(orders_df):,} records")
print(f"• Website sessions: {len(website_sessions_df):,} records")

📊 Loading core datasets...
✅ Data loaded successfully!
• Website pageviews: 1,188,124 records
• Orders: 32,313 records
• Website sessions: 472,871 records


In [2]:
# =============================================================================
# ANALYSIS FUNCTIONS - Imported from individual notebooks
# =============================================================================

# 1. Quarterly Growth Analysis
def quarterly_growth_analysis():
    """Analyze quarterly growth in sessions and orders"""
    # Prepare data
    orders_df['created_at'] = pd.to_datetime(orders_df['created_at'])
    website_sessions_df['created_at'] = pd.to_datetime(website_sessions_df['created_at'])
    
    # Add time dimensions
    orders_df['yr'] = orders_df['created_at'].dt.year
    orders_df['qtr'] = orders_df['created_at'].dt.quarter
    website_sessions_df['yr'] = website_sessions_df['created_at'].dt.year
    website_sessions_df['qtr'] = website_sessions_df['created_at'].dt.quarter
    
    # Calculate quarterly metrics
    quarterly_orders = orders_df.groupby(['yr', 'qtr']).agg({
        'order_id': 'count',
        'price_usd': 'sum'
    }).reset_index()
    quarterly_orders.columns = ['yr', 'qtr', 'orders', 'revenue']
    
    quarterly_sessions = website_sessions_df.groupby(['yr', 'qtr']).agg({
        'website_session_id': 'count'
    }).reset_index()
    quarterly_sessions.columns = ['yr', 'qtr', 'sessions']
    
    # Merge data
    quarterly_data = quarterly_sessions.merge(quarterly_orders, on=['yr', 'qtr'], how='left')
    quarterly_data = quarterly_data.fillna(0)
    
    # Calculate conversion rate
    quarterly_data['conversion_rate'] = quarterly_data['orders'] / quarterly_data['sessions']
    quarterly_data['revenue_per_session'] = quarterly_data['revenue'] / quarterly_data['sessions']
    
    return quarterly_data

# 2. Conversion Efficiency Analysis
def conversion_efficiency_analysis():
    """Analyze conversion efficiency over time"""
    # Prepare data
    orders_df['created_at'] = pd.to_datetime(orders_df['created_at'])
    website_sessions_df['created_at'] = pd.to_datetime(website_sessions_df['created_at'])
    
    # Add time dimensions
    orders_df['yr'] = orders_df['created_at'].dt.year
    orders_df['mo'] = orders_df['created_at'].dt.month
    website_sessions_df['yr'] = website_sessions_df['created_at'].dt.year
    website_sessions_df['mo'] = website_sessions_df['created_at'].dt.month
    
    # Calculate monthly metrics
    monthly_orders = orders_df.groupby(['yr', 'mo']).agg({
        'order_id': 'count',
        'price_usd': 'sum'
    }).reset_index()
    monthly_orders.columns = ['yr', 'mo', 'orders', 'revenue']
    
    monthly_sessions = website_sessions_df.groupby(['yr', 'mo']).agg({
        'website_session_id': 'count'
    }).reset_index()
    monthly_sessions.columns = ['yr', 'mo', 'sessions']
    
    # Merge data
    monthly_data = monthly_sessions.merge(monthly_orders, on=['yr', 'mo'], how='left')
    monthly_data = monthly_data.fillna(0)
    
    # Calculate metrics
    monthly_data['conversion_rate'] = monthly_data['orders'] / monthly_data['sessions']
    monthly_data['revenue_per_session'] = monthly_data['revenue'] / monthly_data['sessions']
    
    return monthly_data

# 3. Channel Analysis
def channel_analysis():
    """Analyze traffic sources and their performance"""
    # Prepare data
    website_sessions_df['created_at'] = pd.to_datetime(website_sessions_df['created_at'])
    website_sessions_df['yr'] = website_sessions_df['created_at'].dt.year
    website_sessions_df['mo'] = website_sessions_df['created_at'].dt.month
    
    # Merge with orders
    sessions_orders = website_sessions_df.merge(
        orders_df[['website_session_id', 'order_id', 'price_usd']], 
        on='website_session_id', 
        how='left'
    )
    
    # Calculate channel metrics
    channel_metrics = sessions_orders.groupby(['yr', 'mo', 'utm_source']).agg({
        'website_session_id': 'count',
        'order_id': 'count',
        'price_usd': 'sum'
    }).reset_index()
    
    channel_metrics.columns = ['yr', 'mo', 'utm_source', 'sessions', 'orders', 'revenue']
    channel_metrics['conversion_rate'] = channel_metrics['orders'] / channel_metrics['sessions']
    channel_metrics['revenue_per_session'] = channel_metrics['revenue'] / channel_metrics['sessions']
    
    return channel_metrics

# 4. Product Page Performance Analysis
def product_page_performance():
    """Analyze product page click-through and conversion rates"""
    # Filter for /products page views
    product_session = pageviews_df[pageviews_df['pageview_url'] == '/products'][
        ['website_session_id', 'website_pageview_id', 'created_at']
    ].copy()
    
    # Data preparation
    product_session['created_at'] = pd.to_datetime(product_session['created_at'])
    product_session['yr'] = product_session['created_at'].dt.year
    product_session['mo'] = product_session['created_at'].dt.month
    
    # Find subsequent pageviews
    subsequent_pageviews = product_session.merge(
        pageviews_df[['website_session_id', 'website_pageview_id']], 
        on='website_session_id',
        how='left',
        suffixes=('_product', '_next')
    )
    
    # Filter for pageviews AFTER the product page
    subsequent_pageviews = subsequent_pageviews[
        subsequent_pageviews['website_pageview_id_next'] > subsequent_pageviews['website_pageview_id_product']
    ]
    
    # Join with orders
    product_session_orders = product_session.merge(
        orders_df[['website_session_id', 'order_id']], 
        on='website_session_id',
        how='left'
    )
    
    # Calculate metrics by year and month
    results = []
    for (yr, mo), group in product_session.groupby(['yr', 'mo']):
        product_sessions = group['website_session_id'].nunique()
        
        clickthrough_sessions = subsequent_pageviews[
            (subsequent_pageviews['yr'] == yr) & 
            (subsequent_pageviews['mo'] == mo)
        ]['website_session_id'].nunique()
        
        converted_sessions = product_session_orders[
            (product_session_orders['yr'] == yr) & 
            (product_session_orders['mo'] == mo) &
            (product_session_orders['order_id'].notna())
        ]['website_session_id'].nunique()
        
        clickthrough_rate = clickthrough_sessions / product_sessions if product_sessions > 0 else 0
        conversion_rate = converted_sessions / product_sessions if product_sessions > 0 else 0
        
        results.append({
            'yr': yr,
            'mo': mo,
            'product_sessions': product_sessions,
            'clickthrough_rt': clickthrough_rate,
            'conversion_rt': conversion_rate
        })
    
    return pd.DataFrame(results)

print("✅ Analysis functions loaded successfully!")


✅ Analysis functions loaded successfully!


In [3]:
# =============================================================================
# EXECUTIVE SUMMARY GENERATION
# =============================================================================

def create_executive_summary():
    """Generate comprehensive executive summary with key metrics"""
    
    # Run all analyses
    print("🔄 Running comprehensive analysis...")
    
    # Quarterly growth analysis
    quarterly_data = quarterly_growth_analysis()
    latest_quarter = quarterly_data.iloc[-1]
    first_quarter = quarterly_data.iloc[0]
    
    # Calculate growth metrics
    orders_growth = (latest_quarter['orders'] / first_quarter['orders']) if first_quarter['orders'] > 0 else 0
    revenue_growth = (latest_quarter['revenue'] / first_quarter['revenue']) if first_quarter['revenue'] > 0 else 0
    conversion_improvement = (latest_quarter['conversion_rate'] / first_quarter['conversion_rate']) if first_quarter['conversion_rate'] > 0 else 0
    
    # Channel analysis
    channel_data = channel_analysis()
    latest_channels = channel_data[channel_data['yr'] == channel_data['yr'].max()]
    total_sessions = latest_channels['sessions'].sum()
    paid_search_sessions = latest_channels[latest_channels['utm_source'] == 'gsearch']['sessions'].sum()
    paid_search_ratio = paid_search_sessions / total_sessions if total_sessions > 0 else 0
    
    # Product page performance
    product_data = product_page_performance()
    latest_product = product_data.iloc[-1]
    avg_clickthrough = product_data['clickthrough_rt'].mean()
    avg_conversion = product_data['conversion_rt'].mean()
    
    # Create summary
    summary = {
        'Business Growth': f"{orders_growth:.0f}x increase in quarterly orders ({first_quarter['orders']:.0f} → {latest_quarter['orders']:.0f})",
        'Revenue Growth': f"{revenue_growth:.0f}x increase in quarterly revenue (${first_quarter['revenue']:,.0f} → ${latest_quarter['revenue']:,.0f})",
        'Conversion Improvement': f"{conversion_improvement:.0f}% improvement in conversion rate ({first_quarter['conversion_rate']:.1%} → {latest_quarter['conversion_rate']:.1%})",
        'Channel Diversification': f"Paid search dependency reduced to {paid_search_ratio:.1%} of total traffic",
        'Product Page Performance': f"Average click-through rate: {avg_clickthrough:.1%}, Conversion rate: {avg_conversion:.1%}",
        'Revenue Efficiency': f"Latest revenue per session: ${latest_quarter['revenue_per_session']:.2f}"
    }
    
    return summary

# Generate and display executive summary
print("\n" + "="*80)
print("📊 EXECUTIVE SUMMARY - E-COMMERCE ANALYTICS")
print("="*80)

executive_summary = create_executive_summary()
for key, value in executive_summary.items():
    print(f"\n🎯 {key}:")
    print(f"   {value}")

print("\n" + "="*80)
print("✅ Analysis complete! All key metrics generated.")
print("="*80)



📊 EXECUTIVE SUMMARY - E-COMMERCE ANALYTICS
🔄 Running comprehensive analysis...

🎯 Business Growth:
   90x increase in quarterly orders (60 → 5420)

🎯 Revenue Growth:
   113x increase in quarterly revenue ($2,999 → $340,376)

🎯 Conversion Improvement:
   3% improvement in conversion rate (3.2% → 8.4%)

🎯 Channel Diversification:
   Paid search dependency reduced to 83.3% of total traffic

🎯 Product Page Performance:
   Average click-through rate: 78.0%, Conversion rate: 11.4%

🎯 Revenue Efficiency:
   Latest revenue per session: $5.30

✅ Analysis complete! All key metrics generated.


In [4]:
# =============================================================================
# DETAILED ANALYSIS EXECUTION
# =============================================================================

print("\n📈 DETAILED ANALYSIS RESULTS")
print("="*50)

# 1. Quarterly Growth Analysis
print("\n1️⃣ QUARTERLY GROWTH ANALYSIS")
quarterly_results = quarterly_growth_analysis()
print(quarterly_results.tail())

# 2. Conversion Efficiency Analysis  
print("\n2️⃣ CONVERSION EFFICIENCY ANALYSIS")
conversion_results = conversion_efficiency_analysis()
print(conversion_results.tail())

# 3. Channel Analysis
print("\n3️⃣ CHANNEL ANALYSIS")
channel_results = channel_analysis()
print("Latest channel performance:")
latest_channels = channel_results[channel_results['yr'] == channel_results['yr'].max()]
print(latest_channels.groupby('utm_source').agg({
    'sessions': 'sum',
    'orders': 'sum', 
    'revenue': 'sum',
    'conversion_rate': 'mean'
}).round(4))

# 4. Product Page Performance
print("\n4️⃣ PRODUCT PAGE PERFORMANCE")
product_results = product_page_performance()
print("Latest product page metrics:")
print(product_results.tail())

print("\n✅ All detailed analyses completed!")



📈 DETAILED ANALYSIS RESULTS

1️⃣ QUARTERLY GROWTH ANALYSIS
      yr  qtr  sessions  orders    revenue  conversion_rate  \
8   2014    1     46779    3069  190771.14         0.065606   
9   2014    2     53129    3848  247711.95         0.072427   
10  2014    3     57141    4035  260237.12         0.070615   
11  2014    4     76373    5908  376891.98         0.077357   
12  2015    1     64198    5420  340375.55         0.084426   

    revenue_per_session  
8              4.078136  
9              4.662462  
10             4.554298  
11             4.934885  
12             5.301965  

2️⃣ CONVERSION EFFICIENCY ANALYSIS
      yr  mo  sessions  orders    revenue  conversion_rate  \
32  2014  11     25125    1985  128162.98         0.079005   
33  2014  12     29722    2314  144823.02         0.077855   
34  2015   1     25337    2098  132211.54         0.082804   
35  2015   2     23778    2068  129212.94         0.086971   
36  2015   3     15083    1254   78951.07         0.083140 

In [5]:
# =============================================================================
# DATA EXPORT AND SAVING
# =============================================================================

print("\n💾 EXPORTING RESULTS TO CSV FILES")
print("="*40)

# Export all results to CSV files
quarterly_results.to_csv('../data/master_quarterly_growth.csv', index=False)
conversion_results.to_csv('../data/master_conversion_efficiency.csv', index=False)
channel_results.to_csv('../data/master_channel_analysis.csv', index=False)
product_results.to_csv('../data/master_product_performance.csv', index=False)

print("✅ Data exported successfully!")
print("📁 Files saved to ../data/ directory:")
print("   • master_quarterly_growth.csv")
print("   • master_conversion_efficiency.csv") 
print("   • master_channel_analysis.csv")
print("   • master_product_performance.csv")

# Create final summary report
print("\n📋 FINAL SUMMARY REPORT")
print("="*30)
print(f"📊 Total quarterly data points: {len(quarterly_results)}")
print(f"📈 Total monthly conversion data: {len(conversion_results)}")
print(f"🔗 Total channel data points: {len(channel_results)}")
print(f"🛍️ Total product page data: {len(product_results)}")

print("\n🎉 Master analysis notebook execution completed!")
print("All analyses integrated and results exported successfully.")



💾 EXPORTING RESULTS TO CSV FILES
✅ Data exported successfully!
📁 Files saved to ../data/ directory:
   • master_quarterly_growth.csv
   • master_conversion_efficiency.csv
   • master_channel_analysis.csv
   • master_product_performance.csv

📋 FINAL SUMMARY REPORT
📊 Total quarterly data points: 13
📈 Total monthly conversion data: 37
🔗 Total channel data points: 82
🛍️ Total product page data: 37

🎉 Master analysis notebook execution completed!
All analyses integrated and results exported successfully.
