In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import datetime as dt

In [None]:
sns.set_style('darkgrid')
sns.set(font_scale = 1.2)

In [None]:
data = pd.read_csv('/kaggle/input/marketing-data/marketing_data.csv')
data.head()

In [None]:
data.info()

### Cleaning Variables

In [None]:
data.rename({' Income ' : 'Income'}, axis = 1, inplace = True)

data['Income'] = data['Income'].str.replace('$', '')
data['Income'] = data['Income'].str.replace(',', '').astype(float)

In [None]:
data['Dt_Customer'] = pd.to_datetime(data['Dt_Customer'])

### Null Values

In [None]:
data.isnull().sum()

In [None]:
data['Income'] = data['Income'].fillna(data['Income'].mean())

### Create more variables

+ At what age customer enrolled in the company

In [None]:
data['Customer_Age_When_Enrolled'] = data['Dt_Customer'].dt.year - data['Year_Birth']

+ Customers enrolled after age 80

In [None]:
data[data['Customer_Age_When_Enrolled'] >= 80]

In [None]:
data['Total Spent'] = (data['MntFishProducts'] + data['MntWines'] + data['MntSweetProducts']
                       + data['MntFruits'] + data['MntMeatProducts'] + data['MntGoldProds'] )

In [None]:
data['Total Purchases'] = (data['NumDealsPurchases'] + data['NumWebPurchases'] + data['NumStorePurchases'] +
                            data['NumCatalogPurchases'])

#### Reducing columns

+ Merging columns Kids at home and Teenagers at home

In [None]:
data['Kids_Teen_at_home'] = data['Kidhome'] + data['Teenhome']
data = data.drop(['Kidhome', 'Teenhome'], axis = 1)

# Exploratory Data Analysis

#### Birth year

In [None]:
sns.boxplot(data = data['Year_Birth'], color = 'r')

+ Remove outliers 

In [None]:
data = data[data['Year_Birth'] >= 1920]

In [None]:
sns.violinplot(data = data['Year_Birth'], color = 'purple')

### Education

In [None]:
plt.figure(figsize = (7, 6))
sns.countplot(x = data['Education'], palette = 'rocket_r')

### Marital Status

In [None]:
m = data['Marital_Status'].value_counts().to_frame('Count')

sns.set_palette('Blues_r')
plt.figure(figsize = (6, 6))
plt.pie(m['Count'], labels = m.index, explode = (0.1, 0, 0, 0, 0, 1, 2.5, 4), shadow = True, autopct = '%1.1f%%')
plt.show()

### Income 

In [None]:
sns.boxplot(data = data['Income'], color = 'gold')

In [None]:
data = data[data['Income'] < 200000]

In [None]:
sns.distplot(data['Income'], color = 'aqua')

### Education vs Salary

In [None]:
sns.set_palette('rocket')
plt.figure(figsize = (12, 7))
sns.violinplot(y = data['Income'], x = data['Education'])

### Income vs Gold

In [None]:
sns.regplot(x = data['MntGoldProds'], y = data['Income'], color = 'gold')
plt.xlabel('Amount Spent on Gold')

### Income vs Total Spent in last two years

In [None]:
sns.regplot(x = data['Total Spent'], y = data['Income'], color = 'red')

### Recency

In [None]:
plt.figure(figsize = (7, 5))
sns.distplot(data['Recency'], color = 'm')

### Customer age when enrolled

In [None]:
plt.figure(figsize = (7, 5))
sns.distplot(data['Customer_Age_When_Enrolled'], color = 'brown')
plt.xlabel('Customer Age When Enrolled')

### Number of Customers from each Country

In [None]:
plt.figure(figsize = (7, 5))
sns.countplot(x = data['Country'], palette = 'icefire_r')
plt.ylabel('No. of Customers')

## Which country has more number of purchases made?

In [None]:
plt.figure(figsize = (10, 7))
sns.violinplot(x = data['Country'], y = data['Total Purchases'], palette = 'coolwarm')

In [None]:
pm = data[['Total Purchases', 'Country']].groupby(['Country']).agg([sum])

sns.set_palette('Spectral')
plt.figure(figsize = (7, 7))
plt.pie(pm['Total Purchases']['sum'], labels = pm.index, explode = (0, 0.5, 0, 0.3, 1, 0, 0, 0.5),
        shadow = True, autopct = '%1.1f%%')
plt.show()

### Number of Enrollments in each month and year

In [None]:
plt.figure(figsize = (8, 5))
sns.countplot(x = data['Dt_Customer'].dt.month)
plt.xlabel('Month')
plt.ylabel('Enrollments')

In [None]:
sns.countplot(x = data['Dt_Customer'].dt.year)
plt.xlabel('Year')
plt.ylabel('Enrollments')

## Purchases made through each source

In [None]:
plt.figure(figsize = (8, 6))
tp = data[['NumDealsPurchases', 'NumWebPurchases', 'NumStorePurchases', 'NumCatalogPurchases']].agg([sum])

sns.barplot(x = tp.T.index, y = tp.T['sum'], palette = 'mako_r')
plt.gca().set_xticklabels(['Deals', 'Web', 'Store', 'Catalog'])
plt.xlabel('Purchase Through')
plt.ylabel('Purchases')
plt.show()

## Income vs Purchases

In [None]:
plt.figure(figsize = (7, 5))
sns.regplot(x = data['Total Purchases'], y = data['Income'], color = 'grey')

## Income vs Wine

In [None]:
plt.figure(figsize = (7, 5))
sns.regplot(x = data['MntWines'], y = data['Income'], color = 'hotpink')
plt.xlabel('Spent on Wine')

## Proportion of customers that spent almost non on gold

In [None]:
bins = [0, 10, 30, 50, 100, 200, 500]
b = data[['ID']].copy()
b['Gold Bins'] = pd.cut(data['MntGoldProds'], bins = bins)
b = b.groupby('Gold Bins').agg([len])

sns.set_palette('viridis_r')
plt.figure(figsize = (10, 7))
plt.pie(b['ID']['len'], labels = b.index,  explode = (0.1, 0, 0.2, 0, 0, 0.3), shadow = True, autopct = '%1.1f%%')
plt.show()

## Amount spent on different Products

In [None]:
prod = data[['MntFruits', 'MntSweetProducts', 'MntFishProducts', 'MntMeatProducts', 'MntGoldProds']].agg([sum]).T

sns.barplot(x = prod.index, y = prod['sum'])
plt.gca().set_xticklabels(['Fruits', 'Sweet', 'Fish', 'Meat', 'Gold'])
plt.xlabel('Products')
plt.ylabel('Amount')

# Work still in Progess.....


# Upvote if you like my work.....