# IITKCourse - Course 2: Marketing Campaign Analysis

This notebook performs exploratory data analysis (EDA), data cleaning, visualization, and hypothesis testing on a marketing campaign dataset.

---

### Problem Statement Summary

- Verify key columns like `Dt_Customer` and `Income`
- Impute missing incomes based on `Education` and `Marital_Status`
- Create new variables: age, total children, total spending, total purchases
- Visualize distributions and treat outliers
- Encode categorical variables
- Generate correlation heatmap
- Test hypotheses on shopping behavior and country spending
- Generate visual insights on product sales, campaign acceptance, and complaints


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
import warnings
warnings.filterwarnings('ignore')

# Load the dataset
df = pd.read_csv('marketing_data.csv', encoding='utf-8-sig')
df.columns = df.columns.str.strip()


In [None]:
# Convert Dt_Customer to datetime
df['Dt_Customer'] = pd.to_datetime(df['Dt_Customer'], errors='coerce')

# Clean Income column and convert to float
df['Income'] = df['Income'].replace('[\$,]', '', regex=True).astype(float)

# Clean categorical columns
df['Education'] = df['Education'].str.strip().str.title()
df['Marital_Status'] = df['Marital_Status'].str.strip().str.title()

# Impute missing Income using median by Education and Marital_Status
df['Income'] = df.groupby(['Education', 'Marital_Status'])['Income'].transform(lambda x: x.fillna(x.median()))
df.dropna(subset=['Income'], inplace=True)

print('Missing Income after imputation:', df['Income'].isnull().sum())
print(df[['Dt_Customer', 'Income']].info())

In [None]:
# Create Age, Children, Total Spending, Total Purchases
df['Age'] = dt.datetime.now().year - df['Year_Birth']
df['Children'] = df['Kidhome'] + df['Teenhome']
df['Total_Spending'] = df[['MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']].sum(axis=1)
df['Total_Purchases'] = df[['NumWebPurchases', 'NumCatalogPurchases', 'NumStorePurchases']].sum(axis=1)

df.head(3)

In [None]:
# Visualize Income and Age distributions
sns.boxplot(x=df['Income'])
plt.title('Income Distribution')
plt.show()

sns.histplot(df['Age'], bins=20, kde=True)
plt.title('Age Distribution')
plt.show()

# Remove outliers in Age and Income
df = df[(df['Age'] < 100) & (df['Income'] < 200000)]
print('Dataset shape after outlier removal:', df.shape)

In [None]:
# Ordinal encoding for Education
ordinal_map = {'Basic':1, '2N Cycle':2, 'Graduation':3, 'Master':4, 'Phd':5}
df['Education_Ordinal'] = df['Education'].map(ordinal_map)

# One-hot encode Marital_Status and Country
df_encoded = pd.get_dummies(df, columns=['Marital_Status', 'Country'], drop_first=True)
print(df_encoded.head(2))

In [None]:
plt.figure(figsize=(12, 8))
sns.heatmap(df_encoded.corr(), cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

In [None]:
# a) Age vs NumStorePurchases
sns.scatterplot(x='Age', y='NumStorePurchases', data=df)
plt.title('Age vs. Store Purchases')
plt.show()

# b) Children vs NumWebPurchases
sns.boxplot(x='Children', y='NumWebPurchases', data=df)
plt.title('Children vs. Web Purchases')
plt.show()

# c) Store vs Online purchases
df['Total_Online'] = df['NumWebPurchases'] + df['NumCatalogPurchases']
sns.scatterplot(x='NumStorePurchases', y='Total_Online', data=df)
plt.title('Store vs. Online Purchases')
plt.show()

# d) USA vs Rest of World total spending
df['Is_US'] = df['Country'].apply(lambda x: 1 if x.upper() == 'US' else 0)
df.groupby('Is_US')['Total_Spending'].sum().plot(kind='bar')
plt.xticks([0,1], ['Rest of World', 'USA'], rotation=0)
plt.title('Total Spending: USA vs. Rest of World')
plt.ylabel('Total Spending')
plt.show()

In [None]:
# a) Product spending
df[['MntWines','MntFruits','MntMeatProducts','MntFishProducts','MntSweetProducts','MntGoldProds']].sum().sort_values(ascending=False).plot(kind='bar')
plt.title('Product Spending')
plt.ylabel('Total Spend')
plt.show()

# b) Age vs Campaign response
sns.boxplot(x='Response', y='Age', data=df)
plt.title('Age vs. Campaign Response')
plt.show()

# c) Campaign acceptance by country
df[df['Response']==1]['Country'].value_counts().plot(kind='bar')
plt.title('Campaign Acceptance by Country')
plt.ylabel('Count')
plt.show()

# d) Children vs Total spending
sns.boxplot(x='Children', y='Total_Spending', data=df)
plt.title('Children vs. Total Spending')
plt.show()

# e) Complaints by education level
df.groupby('Education')['Complain'].sum().plot(kind='bar')
plt.title('Complaints by Education Level')
plt.ylabel('Total Complaints')
plt.show()

# 📊 Interactive Dashboard Section using Plotly

This section provides interactive visualizations summarizing key business insights using Plotly.


In [None]:
import plotly.express as px

# Product spending bar chart
product_sums = df[['MntWines','MntFruits','MntMeatProducts','MntFishProducts','MntSweetProducts','MntGoldProds']].sum().sort_values(ascending=False)
fig_products = px.bar(x=product_sums.index, y=product_sums.values, title='Product Spending', labels={'x':'Product', 'y':'Total Spend'})
fig_products.show()

# Age vs Campaign Response box plot
fig_age_resp = px.box(df, x='Response', y='Age', title='Age vs Campaign Response', labels={'Response':'Campaign Response', 'Age':'Age'})
fig_age_resp.show()

# Campaign acceptance by country
country_accept = df[df['Response'] == 1]['Country'].value_counts().reset_index()
country_accept.columns = ['Country', 'Count']
fig_country = px.bar(country_accept, x='Country', y='Count', title='Campaign Acceptances by Country')
fig_country.show()

# Children vs Total Spending box plot
fig_children = px.box(df, x='Children', y='Total_Spending', title='Children vs Total Spending', labels={'Children':'Number of Children', 'Total_Spending':'Total Spending'})
fig_children.show()

# Complaints by Education bar chart
edu_complain = df.groupby('Education')['Complain'].sum().reset_index()
fig_complain = px.bar(edu_complain, x='Education', y='Complain', title='Complaints by Education Level', labels={'Complain':'Total Complaints'})
fig_complain.show()
