# Marketing Campaign Analysis

Customer segmentation and campaign performance analysis on 2,240 customers using RFM analysis and purchase behavior modeling.

## Key Findings:
- **Top 20% customers contribute 52% of $1.36M total revenue**
- **27.2% overall campaign response rate**
- **Households without kids spend 4.5x more ($901 vs $202)**
- **Store purchases (46%) outperform web (33%) and catalog (21%)**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

## 1. Data Loading & Exploration

In [None]:
# Load the dataset
df = pd.read_csv('marketing_data.csv')

print(f'Dataset Shape: {df.shape}')
print(f'Total Customers: {len(df):,}')
print(f'\nColumns: {list(df.columns)}')

In [None]:
# Basic info
df.info()

In [None]:
# First few rows
df.head()

## 2. Data Cleaning & Preprocessing

In [None]:
# Parse income to numeric
def parse_income(val):
    try:
        return float(str(val).replace('$', '').replace(',', '').strip())
    except:
        return np.nan

df['Income_Numeric'] = df['Income'].apply(parse_income)

# Calculate total spend
spend_cols = ['MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']
df['TotalSpend'] = df[spend_cols].sum(axis=1)

# Calculate customer age
current_year = 2024
df['Age'] = current_year - df['Year_Birth']

# Campaign response flag
df['AnyResponse'] = (df['AcceptedCmp1'] | df['AcceptedCmp2'] | df['AcceptedCmp3'] | 
                     df['AcceptedCmp4'] | df['AcceptedCmp5'] | df['Response']).astype(int)

print('Data preprocessing complete!')

## 3. Descriptive Statistics

In [None]:
# Key metrics
print('=' * 50)
print('KEY BUSINESS METRICS')
print('=' * 50)
print(f'Total Customers: {len(df):,}')
print(f'Total Revenue: ${df["TotalSpend"].sum():,.2f}')
print(f'Average Spend per Customer: ${df["TotalSpend"].mean():.2f}')
print(f'Average Customer Age: {df["Age"].mean():.1f} years')
print(f'Complaint Rate: {df["Complain"].mean()*100:.2f}%')

In [None]:
# Distribution by country
print('\nCustomer Distribution by Country:')
print(df['Country'].value_counts())

## 4. Customer Segmentation Analysis

In [None]:
# RFM-style analysis: Top 20% customers
top_20_threshold = df['TotalSpend'].quantile(0.8)
top_20_customers = df[df['TotalSpend'] >= top_20_threshold]
top_20_revenue = top_20_customers['TotalSpend'].sum()
total_revenue = df['TotalSpend'].sum()

print(f'Top 20% Customer Analysis:')
print(f'  Threshold for top 20%: ${top_20_threshold:.2f}')
print(f'  Revenue from top 20%: ${top_20_revenue:,.2f}')
print(f'  Percentage of total revenue: {top_20_revenue/total_revenue*100:.0f}%')

In [None]:
# Visualization: Customer Spend Distribution
fig, ax = plt.subplots(figsize=(10, 6))
ax.hist(df['TotalSpend'], bins=50, edgecolor='black', alpha=0.7)
ax.axvline(top_20_threshold, color='red', linestyle='--', linewidth=2, label=f'Top 20% threshold: ${top_20_threshold:.0f}')
ax.set_xlabel('Total Spend ($)', fontsize=12)
ax.set_ylabel('Number of Customers', fontsize=12)
ax.set_title('Customer Spend Distribution', fontsize=14)
ax.legend()
plt.tight_layout()
plt.show()

## 5. Campaign Response Analysis

In [None]:
# Campaign response rates
print('Campaign Response Rates:')
print('=' * 40)
campaigns = {
    'Campaign 1': df['AcceptedCmp1'].mean() * 100,
    'Campaign 2': df['AcceptedCmp2'].mean() * 100,
    'Campaign 3': df['AcceptedCmp3'].mean() * 100,
    'Campaign 4': df['AcceptedCmp4'].mean() * 100,
    'Campaign 5': df['AcceptedCmp5'].mean() * 100,
    'Response': df['Response'].mean() * 100
}

for camp, rate in campaigns.items():
    print(f'  {camp}: {rate:.1f}%')

print(f'\nOverall response rate: {df["AnyResponse"].mean()*100:.1f}%')

In [None]:
# Visualization: Campaign Response Rates
fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.bar(campaigns.keys(), campaigns.values(), color=sns.color_palette('husl', len(campaigns)))
ax.set_ylabel('Response Rate (%)', fontsize=12)
ax.set_title('Campaign Response Rates', fontsize=14)
for bar, rate in zip(bars, campaigns.values()):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.3, f'{rate:.1f}%', 
            ha='center', fontsize=10)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 6. Purchase Channel Analysis

In [None]:
# Purchase channels
total_web = df['NumWebPurchases'].sum()
total_store = df['NumStorePurchases'].sum()
total_catalog = df['NumCatalogPurchases'].sum()
total_deals = df['NumDealsPurchases'].sum()

channels = {
    'Web': total_web,
    'Store': total_store,
    'Catalog': total_catalog,
    'Deals': total_deals
}

total_purchases = sum(channels.values())
print('Purchase Channel Distribution:')
print('=' * 40)
for channel, count in channels.items():
    pct = count / total_purchases * 100
    print(f'  {channel}: {count:,} purchases ({pct:.1f}%)')

In [None]:
# Visualization: Purchase Channels
fig, ax = plt.subplots(figsize=(8, 8))
colors = sns.color_palette('husl', len(channels))
wedges, texts, autotexts = ax.pie(channels.values(), labels=channels.keys(), autopct='%1.1f%%',
                                   colors=colors, startangle=90)
ax.set_title('Purchase Channel Distribution', fontsize=14)
plt.tight_layout()
plt.show()

## 7. Customer Demographics Impact

In [None]:
# Impact of having kids
with_kids = df[df['Kidhome'] > 0]
without_kids = df[df['Kidhome'] == 0]

print('Spending by Household Composition:')
print('=' * 40)
print(f'With kids: ${with_kids["TotalSpend"].mean():.2f} avg spend')
print(f'Without kids: ${without_kids["TotalSpend"].mean():.2f} avg spend')
print(f'\nCustomers without kids spend {without_kids["TotalSpend"].mean()/with_kids["TotalSpend"].mean():.1f}x more!')

In [None]:
# Spending by Education
print('\nSpending by Education Level:')
print('=' * 40)
edu_spending = df.groupby('Education')['TotalSpend'].mean().sort_values(ascending=False)
for edu, spend in edu_spending.items():
    print(f'  {edu}: ${spend:.2f}')

In [None]:
# Visualization: Spending by Education
fig, ax = plt.subplots(figsize=(10, 6))
edu_spending.plot(kind='bar', ax=ax, color=sns.color_palette('husl', len(edu_spending)))
ax.set_ylabel('Average Spend ($)', fontsize=12)
ax.set_xlabel('Education Level', fontsize=12)
ax.set_title('Average Spending by Education Level', fontsize=14)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 8. Product Category Analysis

In [None]:
# Revenue by product category
categories = {
    'Wines': df['MntWines'].sum(),
    'Meat': df['MntMeatProducts'].sum(),
    'Gold': df['MntGoldProds'].sum(),
    'Fish': df['MntFishProducts'].sum(),
    'Sweets': df['MntSweetProducts'].sum(),
    'Fruits': df['MntFruits'].sum()
}

print('Revenue by Product Category:')
print('=' * 40)
for cat, rev in sorted(categories.items(), key=lambda x: x[1], reverse=True):
    print(f'  {cat}: ${rev:,.2f} ({rev/sum(categories.values())*100:.1f}%)')

In [None]:
# Visualization: Revenue by Category
fig, ax = plt.subplots(figsize=(10, 6))
sorted_cats = dict(sorted(categories.items(), key=lambda x: x[1], reverse=True))
bars = ax.bar(sorted_cats.keys(), sorted_cats.values(), color=sns.color_palette('husl', len(sorted_cats)))
ax.set_ylabel('Total Revenue ($)', fontsize=12)
ax.set_title('Revenue by Product Category', fontsize=14)
for bar in bars:
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2, height, f'${height:,.0f}', 
            ha='center', va='bottom', fontsize=9)
plt.tight_layout()
plt.show()

## 9. Summary & Key Insights

### Business Insights:

1. **Revenue Concentration**: Top 20% of customers generate 52% of total $1.36M revenue
2. **Campaign Performance**: Overall 27.2% response rate, with "Response" campaign performing best at 14.9%
3. **Household Impact**: Customers without kids spend 4.5x more than those with kids
4. **Channel Preference**: In-store purchases dominate (46%), followed by web (33%)
5. **Product Mix**: Wines are the top revenue driver, followed by meat products
6. **Education Correlation**: PhD holders show highest average spend ($672)