# 📝 Sales Analysis Project Report

## 1. Data Wrangling

### 1.1 Cleaning and Preparing the Data

In [None]:
# Cleaning 'Income' and handling missing values
marketing_data['Income'] = marketing_data['Income'].replace('[\$,]', '', regex=True).astype(float)
marketing_data['Income'].fillna(marketing_data['Income'].median(), inplace=True)

### 1.2 Normalization

In [None]:
from sklearn.preprocessing import MinMaxScaler

numeric_columns = marketing_data.select_dtypes(include=['int64', 'float64']).columns
scaler = MinMaxScaler()
marketing_data[numeric_columns] = scaler.fit_transform(marketing_data[numeric_columns])

## 2. Data Analysis

### 2.1 Descriptive Statistics

In [None]:
# Calculating descriptive stats
purchase_columns = ['Income', 'MntWines', 'MntFruits', 'MntMeatProducts', 
                    'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']

descriptive_stats = {
    "Mean": marketing_data[purchase_columns].mean(),
    "Median": marketing_data[purchase_columns].median(),
    "Mode": marketing_data[purchase_columns].mode().iloc[0],
    "Standard Deviation": marketing_data[purchase_columns].std()
}

pd.DataFrame(descriptive_stats)

### 2.2 Time-Based Sales Reports

In [None]:
# Parsing date and computing reports
marketing_data['Dt_Customer'] = pd.to_datetime(marketing_data['Dt_Customer'], format='%m/%d/%y')
marketing_data['TotalSpend'] = marketing_data[['MntWines', 'MntFruits', 'MntMeatProducts',
                                                'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']].sum(axis=1)

marketing_data['Week'] = marketing_data['Dt_Customer'].dt.isocalendar().week
marketing_data['Month'] = marketing_data['Dt_Customer'].dt.month
marketing_data['Quarter'] = marketing_data['Dt_Customer'].dt.quarter

weekly = marketing_data.groupby('Week')['TotalSpend'].sum()
monthly = marketing_data.groupby('Month')['TotalSpend'].sum()
quarterly = marketing_data.groupby('Quarter')['TotalSpend'].sum()

pd.DataFrame({'Weekly Spend': weekly, 'Monthly Spend': monthly.reindex(range(1, 13)), 'Quarterly Spend': quarterly.reindex(range(1, 5))})

## 3. Data Visualization

### 3.1 State-Wise Demographic Analysis

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="whitegrid")
marketing_data['Demographic'] = marketing_data['Kidhome'] + marketing_data['Teenhome']
marketing_data['Demographic'] = marketing_data['Demographic'].map({0: 'Adults Only', 1: 'Family', 2: 'Family', 3: 'Large Family'})

state_demo = marketing_data.groupby(['Country', 'Demographic'])['TotalSpend'].mean().reset_index()
plt.figure(figsize=(12, 6))
sns.barplot(data=state_demo, x='Country', y='TotalSpend', hue='Demographic')
plt.title('Average Total Spend by State and Demographic Group')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

### 3.2 Group-Wise Sales Breakdown

In [None]:
group_sales = marketing_data.groupby('Demographic')[['MntWines', 'MntFruits',
                        'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']].mean().reset_index()
group_sales_melted = group_sales.melt(id_vars='Demographic', var_name='Product', value_name='Average Spend')

plt.figure(figsize=(12, 6))
sns.barplot(data=group_sales_melted, x='Product', y='Average Spend', hue='Demographic')
plt.title('Group-wise Average Product Spend Across Demographics')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

### 3.3 Recency-Based Engagement Analysis

In [None]:
bins = [0, 10, 30, 60, 90, 120]
labels = ['Very Recent (0-10)', 'Recent (11-30)', 'Moderate (31-60)', 'Stale (61-90)', 'Old (91-120)']
marketing_data['RecencyRange'] = pd.cut(marketing_data['Recency'] * 120, bins=bins, labels=labels, include_lowest=True)

recency_analysis = marketing_data.groupby('RecencyRange')['TotalSpend'].mean().reset_index()

plt.figure(figsize=(10, 5))
sns.barplot(data=recency_analysis, x='RecencyRange', y='TotalSpend', palette='viridis')
plt.title('Average Spend by Recency of Last Purchase')
plt.xticks(rotation=30)
plt.tight_layout()
plt.show()

## 4. Recommendations

- Focus on high-spending demographics and states.
- Introduce targeted programs in lower-revenue states.
- Leverage recency trends to optimize campaign timing.

## 5. Tools Used

- **Python** (pandas, numpy, seaborn, matplotlib, sklearn)
- **Jupyter Notebook**