In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('/kaggle/input/marketing-data/marketing_data.csv')

In [None]:
pd.options.display.max_columns = df.shape[1]
df.head()

# Data Cleaning

In [None]:
df.rename({' Income ':'Income'}, axis=1, inplace=True)
df['Income'] = df['Income'].str.replace('$','').str.replace(',','').astype(float)

In [None]:
df.isnull().sum()

In [None]:
df['Income'].fillna(df['Income'].mean(), inplace=True)

# Data Preparation For Analysis

In [None]:
df.info()

In [None]:
df['Dt_Customer'] = pd.to_datetime(df['Dt_Customer'])

In [None]:
df['Customer_age'] = df['Dt_Customer'].dt.year - df['Year_Birth']

It will be easy to get insights if we make age groups

In [None]:
df['Customer_age_grp'] = pd.cut(df.Customer_age, bins=[0,35,50,150], labels=['Adult','Middle','Old'])

In [None]:
df['Total_purchases'] = df['NumDealsPurchases']+df['NumWebPurchases']+df['NumCatalogPurchases']+df['NumStorePurchases']
df['Children'] = df['Kidhome']+df['Teenhome']

# EDA + Visualization

## 1. Income of Age groups

In [None]:
sns.barplot(x='Customer_age_grp', y='Income', data=df, palette='rainbow')
plt.title('Income of Age Groups', fontsize=20)
plt.xlabel('Customer Age Group',fontsize=12)
plt.ylabel('Income',fontsize=12);

## 2. Income of Age groups country-wise

In [None]:
plt.figure(figsize=(12,9))

sns.barplot(x='Country', y='Income', hue='Customer_age_grp', data=df, estimator=np.mean, palette='autumn')
plt.title('Income w.r.t Country', fontsize=20)
plt.xlabel('Country',fontsize=12)
plt.ylabel('Income',fontsize=12);

Deduction: We can see that in Montenegro Adult has high income while in India adlut class has lowest income

## 3. Purchases w.r.t Age group

In [None]:
plt.figure(figsize=(12,9))

sns.boxplot(x='Customer_age_grp', y='Total_purchases', data=df, palette='autumn')
plt.title('Purchases w.r.t Age', fontsize=20)
plt.xlabel('Age Group',fontsize=12)
plt.ylabel('Purchases',fontsize=12);

Deduction: Purchasing power of old people is more than adults and middle-aged 

## 4. Purchases w.r.t Country

In [None]:
plt.figure(figsize=(12,9))

sns.boxplot(x='Country', y='Total_purchases', data=df, palette='Blues_r')
plt.title('Purchases w.r.t Country', fontsize=20)
plt.xlabel('Country',fontsize=12)
plt.ylabel('Purchases',fontsize=12);

**Deduction: People of Spain has more purchasing power than other countries**

# 5. Purchases w.r.t Income

In [None]:
plt.figure(figsize=(12,9))

sns.scatterplot(x='Income', y='Total_purchases', data=df, palette='Blues_r')
plt.title('Purchases w.r.t Income', fontsize=20)
plt.xlabel('Income',fontsize=12)
plt.ylabel('Purchases',fontsize=12);

**Deduction: Most of the purchases are made by low income customers**

# 6. Product Purchased w.r.t Children

In [None]:
plt.figure(figsize=(10,7))

products = ['MntWines', 'MntFruits',
       'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts',
       'MntGoldProds']

color = ['purple','green','gold','blue','red','brown']

for i,p,c in zip(range(1,len(products)+1),products,color):
    
    if i != 6:
        bottom = df[products[i]]
        plt.bar(x=df['Children'], height=df[p], color=c, bottom=bottom)
    else:
        plt.bar(x=df['Children'], height=df[p], color=c)
        
plt.xlabel('Children')
plt.ylabel('No. of Products')
plt.title('No. of Products w.r.t Children', fontsize=15)

plt.legend([p.replace('Mnt','').replace('Products','') for p in products])
plt.xticks(df['Children'].unique());

In [None]:
products = ['MntWines', 'MntFruits',
       'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts',
       'MntGoldProds']

row, col = 2, 3
p=0

fig, ax = plt.subplots(row,col,figsize=(17,8))

for i in range(row):
    for j in range(col):
        plot = sns.barplot(x=df['Children'],y=df[products[p]], palette='rainbow', ax=ax[i,j])
        plot.set_title('Children and '+products[p].replace('Mnt',''))
        p += 1
fig.tight_layout();

# 7. Product Purchased w.r.t Marital Status

In [None]:
plt.figure(figsize=(10,7))

products = ['MntWines', 'MntFruits',
       'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts',
       'MntGoldProds']

color = ['purple','green','gold','blue','red','brown']

for i,p,c in zip(range(1,len(products)+1),products,color):
    
    if i != 6:
        bottom = df[products[i]]
        plt.bar(x=df['Marital_Status'], height=df[p], color=c, bottom=bottom)
    else:
        plt.bar(x=df['Marital_Status'], height=df[p], color=c)
        
plt.xlabel('Marital Status')
plt.ylabel('No. of Products')
plt.title('No. of Products w.r.t Marital Status', fontsize=15)

plt.legend([p.replace('Mnt','').replace('Products','').replace('Prods','') for p in products])
plt.xticks(df['Marital_Status'].unique());

In [None]:
products = ['MntWines', 'MntFruits',
       'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts',
       'MntGoldProds']

row, col = 2, 3
p=0

fig, ax = plt.subplots(row,col,figsize=(17,8))
#plt.subplots_adjust(bottom=0.7, top=1, wspace=0.8, hspace=0.1)

for i in range(row):
    for j in range(col):
        plot = sns.barplot(x=df['Marital_Status'],y=df[products[p]], palette='rainbow', ax=ax[i,j])
        plot.set_title('Marital Status and '+products[p].replace('Mnt',''), fontsize=15)
        p += 1
        
fig.tight_layout(); 

# 8. Average Income w.r.t Education

In [None]:
plt.figure(figsize=(12,9))

sns.barplot(x='Education', y='Income', data=df, estimator=np.mean, palette='coolwarm')
plt.title('Income and Education', fontsize=20)
plt.xlabel('Education',fontsize=12)
plt.ylabel('Income',fontsize=12);

# 9. Complain Count

In [None]:
sns.countplot('Complain', data=df);

Analysis: Awesome! There are very little complains 

# Please suggest any improvement it will be very helpful for me and other mates, Thanks!

# ****UPVOTE if you liked****