In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as mp
from matplotlib import style
style.use('ggplot')
import seaborn as sns
import plotly as pt
import plotly.express as px
import plotly.subplots
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import cufflinks as cf
import plotly.offline as pyo
from plotly.offline import init_notebook_mode,plot,iplot
import folium as flm



%matplotlib inline

In [None]:
pyo.init_notebook_mode(connected = True)
cf.go_offline()

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Section 01: Exploratory Data Analysis

In [None]:
df = pd.read_csv('../input/marketing-data/marketing_data.csv')

In [None]:
df.head()

In [None]:
df.shape

# Finding Null Values 

In [None]:
sns.heatmap(df.isnull() , yticklabels = False , cmap = 'viridis' , cbar = False)

In [None]:
df.dtypes

In [None]:
df.rename(columns={' Income ': 'Income'} , inplace = True)

In [None]:
df['Income'] = df['Income'].str.replace('$', '')
df['Income'] = df['Income'].str.replace(',', '')
df.Income = df.Income.str.rstrip()
df.Income = df.Income.astype(float)


In [None]:
df['Dt_Customer'] = df['Dt_Customer'].astype('datetime64[ns]')

In [None]:
print(df.Income.dtypes)
print(df.Dt_Customer.dtypes)

In [None]:
df.Income.describe()

In [None]:
df['Income'] = df['Income'].fillna((df['Income'].mean()))

In [None]:
sns.heatmap(df.isnull() , yticklabels = False , cmap = 'viridis' , cbar = False)

# Finding Outliers

In [None]:
mp.figure(1 , figsize = (20,20))

mp.subplot(331)
sns.boxplot(data = df , x = 'ID')

mp.subplot(332)
sns.boxplot(data = df , x = 'Year_Birth')

mp.subplot(333)
sns.boxplot(data = df , x = 'Income')

mp.subplot(334)
sns.boxplot(data = df , x = 'Kidhome')

mp.subplot(335)
sns.boxplot(data = df , x = 'Teenhome')

mp.subplot(336)
sns.boxplot(data = df , x = 'Recency')

mp.subplot(337)
sns.boxplot(data = df ,x = 'Complain')



In [None]:
df[df.Income > 600000]

In [None]:
df.drop([527] , inplace = True)

In [None]:
df[df.Income > 150000]

In [None]:
df[df.Year_Birth<1910]

In [None]:
df.drop([513 , 827 , 2232] , inplace = True)

In [None]:
mp.figure(1 , figsize = (20,20))

mp.subplot(231)
sns.boxplot(data = df , x = 'MntWines')

mp.subplot(232)
sns.boxplot(data = df , x = 'MntFruits')

mp.subplot(233)
sns.boxplot(data = df , x = 'MntMeatProducts') 

mp.subplot(234)
sns.boxplot(data = df , x = 'MntFishProducts')

mp.subplot(235)
sns.boxplot(data = df , x = 'MntSweetProducts')

mp.subplot(236)
sns.boxplot(data = df , x = 'MntGoldProds')


In [None]:
fig, axs = mp.subplots(1, 9, sharey = True, tight_layout=True , figsize = (20,8))

axs[0].hist(x = df['NumStorePurchases'])
axs[0].title.set_text('NumStorePurchases')

axs[1].hist(x = df['NumWebVisitsMonth'])
axs[1].title.set_text('NumWebVisitsMonth')

axs[2].hist(x = df['AcceptedCmp1'])
axs[2].title.set_text('AcceptedCmp1')

axs[3].hist(x = df['AcceptedCmp2'])
axs[3].title.set_text('AcceptedCmp2')

axs[4].hist(x = df['AcceptedCmp3'])
axs[4].title.set_text('AcceptedCmp3')

axs[5].hist(x = df['AcceptedCmp4'])
axs[5].title.set_text('AcceptedCmp4')

axs[6].hist(x = df['AcceptedCmp5'])
axs[6].title.set_text('AcceptedCmp5')

axs[7].hist(x = df['Response'])
axs[7].title.set_text('Response')

axs[8].hist(x = df['Complain'])
axs[8].title.set_text('Complain')

# Feature Engineering

In [None]:
df['TotalMntSpent'] = df['MntWines'] + df['MntFruits'] + df['MntMeatProducts'] + df['MntSweetProducts'] + df['MntFishProducts'] +df['MntGoldProds']

In [None]:
df['TotalChildren'] = df['Kidhome'] + df['Teenhome']

In [None]:
df['TotalPurchases'] = df['NumDealsPurchases'] + df['NumWebPurchases'] + df['NumCatalogPurchases']

# Section 02: Statistical Analysis

# Factors related to the number of store purchases

In [None]:
NumStorePurchases = df.NumStorePurchases.unique()

In [None]:
NumStorePurchases = np.array([ 6,  7,  5,  2,  3,  9, 10,  0,  8,  4, 13, 12,  1, 11])
print(np.sort(NumStorePurchases))

1. Number of Kids/Teens/Total Children at Home

In [None]:
NumStorePurchases_Kidhome = df.groupby('Kidhome').sum()['NumStorePurchases'].reset_index()
NumStorePurchases_Kidhome

In [None]:
NumStorePurchases_Teenhome = df.groupby('Teenhome').sum()['NumStorePurchases'].reset_index()
NumStorePurchases_Teenhome

In [None]:
NumStorePurchases_TotalChildren= df.groupby('TotalChildren').sum()['NumStorePurchases'].reset_index()
NumStorePurchases_TotalChildren

In [None]:
fig = make_subplots(rows=1, cols=3, shared_yaxes=True  , subplot_titles=("Number of Kids at home", "Number of Teens at home", "Total Children"))

fig.add_trace(go.Bar(x=NumStorePurchases_Kidhome['Kidhome'], y=NumStorePurchases_Kidhome['NumStorePurchases'],marker=dict(color='darkorange')),1, 1)

fig.add_trace(go.Bar(x=NumStorePurchases_Teenhome['Teenhome'], y=NumStorePurchases_Teenhome['NumStorePurchases'],marker=dict(color='darkorange')),1, 2)

fig.add_trace(go.Bar(x=NumStorePurchases_TotalChildren['TotalChildren'], y=NumStorePurchases_TotalChildren['NumStorePurchases'],marker=dict(color='darkorange')),1, 3)

fig.update_layout(coloraxis=dict(colorscale='Bluered_r') , yaxis_title ='Number of Store Purchases', title_text="How number of children at home and their age affect Number of Store Purchases" ,showlegend=False)
fig.show()

2. Marital Status

In [None]:
Marital_Status = df.groupby('Marital_Status').sum()['NumStorePurchases'].reset_index()

Marital_Status

In [None]:
px.bar(Marital_Status ,x = 'Marital_Status' , y = 'NumStorePurchases' , title = 'How marital status affect the number of purschases')

3. Education

In [None]:
Education = df.groupby('Education').sum()['NumStorePurchases'].reset_index()

Education

In [None]:
px.bar(Education ,x = 'Education' , y = 'NumStorePurchases' , title = 'How Education affect the number of purschases')

3. Age Group

In [None]:
df.Year_Birth.unique()

In [None]:
A = df.groupby('Year_Birth').sum()['NumStorePurchases'].reset_index()

A

In [None]:
Year_Birth = df.groupby('Year_Birth')['NumStorePurchases'].sum().sort_values(ascending = False).to_frame()

Year_Birth.style.background_gradient(cmap ='Pastel1_r')

In [None]:
px.bar(A , x = 'Year_Birth' , y = 'NumStorePurchases')

4. Income

In [None]:
B = df.groupby('Income').sum()['NumStorePurchases'].reset_index()

B

In [None]:
fig = px.scatter(B, x="Income", y ='NumStorePurchases')
fig.show()

5. Country

In [None]:
Country = df.groupby('Country').sum()['NumStorePurchases'].reset_index()

Country

In [None]:
px.bar(Country , x = 'Country' , y = 'NumStorePurchases')

# US Purchases vs World Purchases

In [None]:
Country_TotalPurchases = df.groupby('Country').sum()['TotalPurchases'].reset_index()

Country_TotalPurchases

In [None]:
px.bar(Country_TotalPurchases , x = 'Country' , y = 'TotalPurchases')

# Relationship between Buying Gold and In-Store Purchases

In [None]:
MntGoldProds= df.groupby('MntGoldProds').sum()['NumStorePurchases'].reset_index()

In [None]:
MntGoldProds

In [None]:
fig = px.scatter(MntGoldProds , x = 'NumStorePurchases' , y = 'MntGoldProds')
fig.show()

# Are Married PhD customers buying more Fish Products ?

In [None]:
MntFishProducts = df.groupby('Education').sum()['MntFishProducts'].reset_index()

In [None]:
MntFishProducts

In [None]:
MntFishProducts.iplot(x = 'Education' , y = 'MntFishProducts' , kind = 'bar')

In [None]:
MntFishProducts_Marital_Status =  df.groupby('Marital_Status').sum()['MntFishProducts'].reset_index()

In [None]:
MntFishProducts_Marital_Status

In [None]:
MntFishProducts_Marital_Status.iplot(x = 'Marital_Status' , y = 'MntFishProducts' , kind = 'bar')

# Other factors related to amount spent on fish

1. Number of Children at Home

In [None]:
TotalChildren_MntFishProducts = df.groupby('TotalChildren').sum()['MntFishProducts'].reset_index()

TotalChildren_MntFishProducts

In [None]:
px.bar(TotalChildren_MntFishProducts , x = 'TotalChildren' , y = 'MntFishProducts' )

2. Income

In [None]:
px.scatter( df ,x = 'Income' , y = 'MntFishProducts' )

# Relationship between geographical region and success of a campaign

In [None]:
df.Country.unique()

In [None]:
AcceptedCmp1 = df.groupby('Country').sum()['AcceptedCmp1'].reset_index()
AcceptedCmp2 = df.groupby('Country').sum()['AcceptedCmp2'].reset_index()
AcceptedCmp3 = df.groupby('Country').sum()['AcceptedCmp3'].reset_index()
AcceptedCmp4 = df.groupby('Country').sum()['AcceptedCmp4'].reset_index()
AcceptedCmp5 = df.groupby('Country').sum()['AcceptedCmp5'].reset_index()
Response = df.groupby('Country').sum()['Response'].reset_index()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = AcceptedCmp1['Country'], y = AcceptedCmp1['AcceptedCmp1'],
                        mode = 'lines+markers',
                        name = 'AcceptedCmp1',
                        line = dict(color = 'yellowgreen'  ,width = 2)))

fig.add_trace(go.Scatter(x = AcceptedCmp2['Country'], y = AcceptedCmp2['AcceptedCmp2'],
                        mode = 'lines+markers',
                        name = 'AcceptedCmp2',
                        line = dict(color = 'red'  ,width = 2)))

fig.add_trace(go.Scatter(x = AcceptedCmp3['Country'], y = AcceptedCmp3['AcceptedCmp3'],
                        mode = 'lines+markers',
                        name = 'AcceptedCmp3',
                        line = dict(color = 'olive'  ,width = 2)))

fig.add_trace(go.Scatter(x = AcceptedCmp4['Country'], y = AcceptedCmp4['AcceptedCmp4'],
                        mode = 'lines+markers',
                        name = 'AcceptedCmp4',
                        line = dict(color = 'magenta'  ,width = 2)))

fig.add_trace(go.Scatter(x = AcceptedCmp5['Country'], y = AcceptedCmp5['AcceptedCmp5'],
                        mode = 'lines+markers',
                        name = 'AcceptedCmp5',
                        line = dict(color = 'blue'  ,width = 2)))

fig.add_trace(go.Scatter(x = Response['Country'], y = Response['Response'],
                        mode = 'lines+markers',
                        name = 'Response',
                        line = dict(color = 'darkorange'  ,width = 2)))

fig.update_layout(title_text="Success of Campaign in different Countries" , xaxis_title ='Country' ,yaxis_title="Total Offers accepted by Customers")
fig.show()

# Section 03: Data Visualization

# Most Successful Marketing Campaign

In [None]:
ac1 = (df.AcceptedCmp1.sum())
ac2 = (df.AcceptedCmp2.sum())
ac3 = (df.AcceptedCmp3.sum())
ac4 = (df.AcceptedCmp4.sum())
ac5 = (df.AcceptedCmp5.sum())
rsp = (df.Response.sum())

In [None]:
fig  = px.bar(x = ['AcceptedCmp1' , 'AcceptedCmp2' , 'AcceptedCmp3' ,'AcceptedCmp4' , 'AcceptedCmp5' , 'Response'] , 
      y = [ac1 , ac2 , ac3 , ac4 , ac5 , rsp])

fig.update_xaxes(title_text='Marketing Campaign')
fig.update_yaxes(title_text='Total Offers accepted by Customers')
fig.update_layout(title = 'Success of Marketing Campaigns')

fig.show()

# An average customer is from Spain, born between the years 1970 and 1980 with a Graduation and married with 0 kids.

# Best Performing Products

In [None]:
Wine = df.MntWines.sum()
Fruits = df.MntFruits.sum()
Meat = df.MntMeatProducts.sum()
Fish = df.MntFishProducts.sum()
Sweet = df.MntSweetProducts.sum()
Gold = df.MntGoldProds.sum()

In [None]:
fig = go.Figure(data = go.Scatter(x=['Wine' , 'Fruits' , 'Meat' , 'Fish' , 'Sweet' , 'Gold'],
                         y=[Wine , Fruits , Meat , Fish , Sweet, Gold],
                         mode='lines+markers' ,
                         marker = dict(size = 30)))

fig.update_xaxes(title_text = 'Products')
fig.update_yaxes(title_text = 'Amount Spent')

fig.update_layout(title = 'Product Performance')

# Countries with most Purchases depicted through a Stamen terrain map

In [None]:
TotalPurchases_Country = df.groupby('Country')['TotalPurchases'].sum().reset_index()

In [None]:
d = {'Country': ['AUS', 'CA' , 'GER' , 'IND' , 'ME' , 'SA' ,'SP' , 'US'],
     'Latitude': [-25.274398, 56.1304 ,51.1657,20.5937,42.7087,30.5595,40.4637,37.0902],
    'Longitude' : [133.7751,-106.346771,10.4515,78.9629,19.3744,22.9375,-3.7492,-95.7129]}
long_lat = pd.DataFrame(data=d)
long_lat

In [None]:
Country_latlong = pd.merge(TotalPurchases_Country , long_lat , on = 'Country')

Country_latlong

In [None]:
map = flm.Map(location = [50,50] , zoom_start = 1.5, tiles='Stamenterrain')

for lat,long,TotalPurchases,name in zip(Country_latlong['Latitude'] , Country_latlong['Longitude'] , Country_latlong['TotalPurchases'] ,Country_latlong['Country']):
    flm.CircleMarker(
        
        [lat , long],
        radius = TotalPurchases/200,
        popup = ('<strong>Country</strong>: '+str(name).capitalize()+'<br>''<strong>Total Purchases</strong>: '+str(TotalPurchases)+'<br>'),
        color = 'darkorange',
        fill_color = 'darkorange',
        fill_opacity = 0.1).add_to(map)

# Map

In [None]:
map