# Price Elasticity Analysis - Exploratory Data Analysis

This notebook provides an interactive exploration of the price elasticity dataset and preliminary analysis.

## Table of Contents
1. [Data Loading and Overview](#data-loading)
2. [Data Quality Assessment](#data-quality)
3. [Exploratory Data Analysis](#eda)
4. [Price-Demand Relationships](#price-demand)
5. [Competitive Analysis](#competitive)
6. [Preliminary Insights](#insights)

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import warnings
warnings.filterwarnings('ignore')

# Configure plotting
plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = (12, 8)
sns.set_palette("husl")

# Add src to path for imports
sys.path.append('../src')

from data_processor import DataProcessor
from elasticity_models import ElasticityAnalyzer

print("📊 Price Elasticity Analysis - Exploratory Data Analysis")
print("=" * 60)

## 1. Data Loading and Overview {#data-loading}

In [None]:
# Initialize data processor
processor = DataProcessor('../data/')

# Load all datasets
datasets = processor.load_all_data()

# Display basic information
print("📈 Dataset Overview:")
for name, data in datasets.items():
    print(f"  {name}: {data.shape[0]:,} rows × {data.shape[1]} columns")

In [None]:
# Examine market data structure
print("🔍 Market Data Sample:")
display(processor.market_data.head())

print("\n📊 Market Data Info:")
processor.market_data.info()

In [None]:
# Basic statistics
print("📈 Descriptive Statistics:")
display(processor.market_data.describe())

## 2. Data Quality Assessment {#data-quality}

In [None]:
# Perform data quality check
quality_report = processor.data_quality_check()

print("🔍 Data Quality Report:")
print(f"Missing Values: {quality_report.get('missing_values', 'None detected')}")
print(f"Consistency Issues: {quality_report.get('consistency_issues', 'None detected')}")
print(f"Outliers Detected: {quality_report.get('outliers', 'Analysis pending')}")

In [None]:
# Visualize missing data patterns
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Missing data heatmap
sns.heatmap(processor.market_data.isnull(), ax=axes[0], cbar=True, yticklabels=False)
axes[0].set_title('Missing Data Pattern')

# Missing data by column
missing_pct = (processor.market_data.isnull().sum() / len(processor.market_data)) * 100
missing_pct = missing_pct[missing_pct > 0]

if len(missing_pct) > 0:
    missing_pct.plot(kind='bar', ax=axes[1])
    axes[1].set_title('Missing Data Percentage by Column')
    axes[1].set_ylabel('Percentage Missing')
else:
    axes[1].text(0.5, 0.5, 'No Missing Data\nDetected', 
                ha='center', va='center', transform=axes[1].transAxes, 
                fontsize=14, bbox=dict(boxstyle='round', facecolor='lightgreen'))
    axes[1].set_title('Missing Data Assessment')

plt.tight_layout()
plt.show()

## 3. Exploratory Data Analysis {#eda}

In [None]:
# Price distribution by category
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Price distributions
processor.market_data.boxplot(column='price_own', by='product_category', ax=axes[0,0])
axes[0,0].set_title('Price Distribution by Category')
axes[0,0].set_xlabel('Product Category')
axes[0,0].set_ylabel('Price ($)')

# Quantity distributions
processor.market_data.boxplot(column='quantity_sold', by='product_category', ax=axes[0,1])
axes[0,1].set_title('Quantity Distribution by Category')
axes[0,1].set_xlabel('Product Category')
axes[0,1].set_ylabel('Quantity Sold')

# Revenue over time
revenue_by_date = processor.market_data.groupby(['date', 'product_category'])['revenue'].sum().reset_index()
for category in processor.market_data['product_category'].unique():
    cat_data = revenue_by_date[revenue_by_date['product_category'] == category]
    axes[1,0].plot(cat_data['date'], cat_data['revenue'], marker='o', label=category)
axes[1,0].set_title('Revenue Trends by Category')
axes[1,0].set_xlabel('Date')
axes[1,0].set_ylabel('Revenue ($)')
axes[1,0].legend()
axes[1,0].tick_params(axis='x', rotation=45)

# Market share evolution
for category in processor.market_data['product_category'].unique():
    cat_data = processor.market_data[processor.market_data['product_category'] == category]
    axes[1,1].plot(cat_data['date'], cat_data['market_share'], marker='o', label=category)
axes[1,1].set_title('Market Share Evolution')
axes[1,1].set_xlabel('Date')
axes[1,1].set_ylabel('Market Share')
axes[1,1].legend()
axes[1,1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

In [None]:
# Correlation analysis
# Select numerical columns for correlation
numerical_cols = ['price_own', 'quantity_sold', 'revenue', 'seasonality_index', 'market_share']
correlation_matrix = processor.market_data[numerical_cols].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='RdYlBu_r', center=0, 
            square=True, linewidths=0.5)
plt.title('Correlation Matrix - Key Variables')
plt.tight_layout()
plt.show()

print("🔍 Key Correlations:")
print(f"Price vs Quantity: {correlation_matrix.loc['price_own', 'quantity_sold']:.3f}")
print(f"Price vs Revenue: {correlation_matrix.loc['price_own', 'revenue']:.3f}")
print(f"Quantity vs Revenue: {correlation_matrix.loc['quantity_sold', 'revenue']:.3f}")

## 4. Price-Demand Relationships {#price-demand}

In [None]:
# Price vs quantity scatter plots by category
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

categories = processor.market_data['product_category'].unique()
colors = ['blue', 'orange', 'green']

for idx, category in enumerate(categories):
    cat_data = processor.market_data[processor.market_data['product_category'] == category]
    
    # Scatter plot
    axes[idx].scatter(cat_data['price_own'], cat_data['quantity_sold'], 
                     color=colors[idx], alpha=0.7, s=60)
    
    # Add trend line
    z = np.polyfit(cat_data['price_own'], cat_data['quantity_sold'], 1)
    p = np.poly1d(z)
    axes[idx].plot(cat_data['price_own'].sort_values(), 
                   p(cat_data['price_own'].sort_values()), 
                   "r--", alpha=0.8, linewidth=2)
    
    # Calculate basic elasticity approximation
    slope = z[0]
    mean_price = cat_data['price_own'].mean()
    mean_quantity = cat_data['quantity_sold'].mean()
    basic_elasticity = slope * (mean_price / mean_quantity)
    
    axes[idx].set_title(f'{category}\nBasic Elasticity ≈ {basic_elasticity:.2f}')
    axes[idx].set_xlabel('Price ($)')
    axes[idx].set_ylabel('Quantity Sold')
    axes[idx].grid(True, alpha=0.3)

plt.suptitle('Price-Demand Relationships by Product Category', fontsize=16)
plt.tight_layout()
plt.show()

In [None]:
# Promotional impact analysis
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Price comparison: promotion vs non-promotion
promo_data = processor.market_data.groupby(['product_category', 'promotion_flag']).agg({
    'price_own': 'mean',
    'quantity_sold': 'mean',
    'revenue': 'mean'
}).reset_index()

promo_pivot = promo_data.pivot(index='product_category', columns='promotion_flag', values='quantity_sold')
promo_pivot.plot(kind='bar', ax=axes[0])
axes[0].set_title('Average Quantity Sold: Promotion vs No Promotion')
axes[0].set_xlabel('Product Category')
axes[0].set_ylabel('Average Quantity Sold')
axes[0].legend(['No Promotion', 'Promotion'])
axes[0].tick_params(axis='x', rotation=45)

# Revenue impact of promotions
revenue_pivot = promo_data.pivot(index='product_category', columns='promotion_flag', values='revenue')
revenue_pivot.plot(kind='bar', ax=axes[1])
axes[1].set_title('Average Revenue: Promotion vs No Promotion')
axes[1].set_xlabel('Product Category')
axes[1].set_ylabel('Average Revenue ($)')
axes[1].legend(['No Promotion', 'Promotion'])
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Calculate promotion lift
print("📈 Promotion Impact Analysis:")
for category in categories:
    cat_promo = promo_data[promo_data['product_category'] == category]
    if len(cat_promo) == 2:  # Both promo and non-promo data
        no_promo = cat_promo[cat_promo['promotion_flag'] == 0]['quantity_sold'].iloc[0]
        promo = cat_promo[cat_promo['promotion_flag'] == 1]['quantity_sold'].iloc[0]
        lift = ((promo - no_promo) / no_promo) * 100
        print(f"  {category}: {lift:+.1f}% quantity lift from promotions")

## 5. Competitive Analysis {#competitive}

In [None]:
# Load and examine competitor data
print("🏢 Competitor Data Overview:")
display(processor.competitor_data.head())

# Merge with market data for competitive analysis
processed_data = processor.create_elasticity_features()

print(f"\n📊 Enhanced dataset shape: {processed_data.shape}")
print(f"🔧 New features created: {[col for col in processed_data.columns if 'comp' in col or 'relative' in col]}")

In [None]:
# Competitive positioning analysis
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Price positioning by category
for idx, category in enumerate(categories):
    cat_data = processed_data[processed_data['product_category'] == category]
    
    if idx < 2:
        ax = axes[0, idx]
    else:
        ax = axes[1, 0]
    
    # Plot price evolution
    ax.plot(cat_data['date'], cat_data['price_own'], 'b-', linewidth=3, label='Our Price')
    ax.plot(cat_data['date'], cat_data['competitor_a_price'], 'r--', label='Competitor A')
    ax.plot(cat_data['date'], cat_data['competitor_b_price'], 'g--', label='Competitor B')
    ax.plot(cat_data['date'], cat_data['competitor_c_price'], 'orange', linestyle='--', label='Competitor C')
    
    ax.set_title(f'{category} - Competitive Pricing')
    ax.set_xlabel('Date')
    ax.set_ylabel('Price ($)')
    ax.legend()
    ax.grid(True, alpha=0.3)
    ax.tick_params(axis='x', rotation=45)

# Overall price positioning summary
price_summary = processed_data.groupby('product_category').agg({
    'price_own': 'mean',
    'competitor_a_price': 'mean',
    'competitor_b_price': 'mean',
    'competitor_c_price': 'mean'
}).round(2)

price_summary.plot(kind='bar', ax=axes[1,1], width=0.8)
axes[1,1].set_title('Average Price Positioning')
axes[1,1].set_xlabel('Product Category')
axes[1,1].set_ylabel('Average Price ($)')
axes[1,1].legend(['Our Price', 'Competitor A', 'Competitor B', 'Competitor C'])
axes[1,1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

print("💰 Average Price Positioning:")
display(price_summary)

In [None]:
# Price differential analysis
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

diff_cols = ['price_diff_comp_a', 'price_diff_comp_b', 'price_diff_comp_c']
comp_names = ['vs Competitor A', 'vs Competitor B', 'vs Competitor C']

for idx, (col, name) in enumerate(zip(diff_cols, comp_names)):
    processed_data.boxplot(column=col, by='product_category', ax=axes[idx])
    axes[idx].set_title(f'Price Differential {name}')
    axes[idx].set_xlabel('Product Category')
    axes[idx].set_ylabel('Price Difference ($)')
    axes[idx].axhline(y=0, color='red', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()

# Summary statistics for price differentials
print("📊 Price Differential Summary:")
diff_summary = processed_data.groupby('product_category')[diff_cols].mean().round(2)
diff_summary.columns = ['vs Comp A', 'vs Comp B', 'vs Comp C']
display(diff_summary)

print("\n💡 Interpretation:")
print("• Positive values: We price higher than competitor")
print("• Negative values: We price lower than competitor")

## 6. Preliminary Insights {#insights}

In [None]:
# Generate data summary for insights
summary = processor.get_data_summary()

print("🎯 PRELIMINARY INSIGHTS")
print("=" * 40)

print("\n📈 Dataset Overview:")
for key, value in summary['dataset_info'].items():
    print(f"  {key.replace('_', ' ').title()}: {value}")

print("\n💰 Price Statistics by Category:")
for category, stats in summary['price_statistics'].items():
    print(f"\n  {category}:")
    print(f"    Average Price: ${stats['avg_price']:.2f}")
    print(f"    Price Range: ${stats['min_price']:.2f} - ${stats['max_price']:.2f}")
    print(f"    Average Quantity: {stats['avg_quantity']:,.0f} units")
    print(f"    Total Revenue: ${stats['total_revenue']:,.2f}")

print("\n🔍 Elasticity Insights:")
for category, insights in summary['elasticity_insights'].items():
    print(f"\n  {category}:")
    print(f"    Estimated Elasticity: {insights['estimated_elasticity']}")
    print(f"    Price Volatility: {insights['price_volatility']:.3f}")
    print(f"    Demand Volatility: {insights['demand_volatility']:.3f}")

In [None]:
# Key business insights
print("\n🚀 KEY BUSINESS INSIGHTS:")
print("=" * 30)

# Price sensitivity ranking
elasticity_data = []
for category, insights in summary['elasticity_insights'].items():
    if insights['estimated_elasticity'] != 'N/A':
        elasticity_data.append((category, insights['estimated_elasticity']))

if elasticity_data:
    elasticity_data.sort(key=lambda x: x[1])  # Sort by elasticity (most negative first)
    
    print("\n📊 Price Sensitivity Ranking (Most to Least Sensitive):")
    for i, (category, elasticity) in enumerate(elasticity_data, 1):
        sensitivity = "High" if elasticity < -2 else "Moderate" if elasticity < -1 else "Low"
        print(f"  {i}. {category}: {elasticity:.2f} ({sensitivity} sensitivity)")

# Revenue opportunities
print("\n💡 Strategic Opportunities:")
revenue_ranking = sorted(summary['price_statistics'].items(), 
                        key=lambda x: x[1]['total_revenue'], reverse=True)

for category, stats in revenue_ranking:
    avg_price = stats['avg_price']
    if category in [item[0] for item in elasticity_data]:
        elasticity = next(item[1] for item in elasticity_data if item[0] == category)
        if elasticity > -1.5:  # Less elastic
            opportunity = "Premium pricing opportunity"
        elif elasticity < -2.5:  # Very elastic
            opportunity = "Volume focus, competitive pricing"
        else:
            opportunity = "Balanced pricing strategy"
        print(f"  • {category}: {opportunity}")

print("\n⚠️ Areas for Further Investigation:")
print("  • Detailed competitive response modeling")
print("  • Seasonal elasticity variations")
print("  • Customer segment-specific elasticity")
print("  • Cross-price elasticity quantification")
print("  • Promotional effectiveness measurement")

In [None]:
# Save key insights for the next notebook
insights_summary = {
    'dataset_shape': processed_data.shape,
    'categories': list(categories),
    'price_ranges': {cat: {'min': summary['price_statistics'][cat]['min_price'], 
                           'max': summary['price_statistics'][cat]['max_price']} 
                    for cat in categories},
    'preliminary_elasticities': dict(elasticity_data) if elasticity_data else {},
    'total_revenue': sum(stats['total_revenue'] for stats in summary['price_statistics'].values())
}

print("\n✅ Exploratory analysis complete!")
print("📝 Key insights saved for detailed modeling phase")
print("➡️ Next: Proceed to '02_elasticity_modeling.ipynb' for advanced analysis")