# <div style="text-align: center; background-color: #191414;font-size:200%; font-family:Courier New; color: #56E39F; padding: 20px; line-height: 1;border-radius:40px"><b>Customer Personality Analysis</b></div>

<h1 style="text-align: left;background-color: #191414; font-size:200%; font-family:Courier New; color: #56E39F; padding: 14px; line-height: 1; border-radius:10px"> <b>Table of Contents</b></h1>


<a id="top"></a>
<div class="list-group" id="list-tab" role="tablist">
    
   * [1. Imports](#1)
   * [2. Data Cleaning](#2)
   * [3. Box Plot to Detect Outliers](#3)
   * [4. Donut Charts](#4)
   * [5. Feature Distribution](#5)
   * [6. Scatter Plots](#6)
   * [7. Sunbursts](#7)
   * [8. Multivariate Histograms](#8)
   * [9. Tree Maps](#9)
   * [10. 3D Scatter Plot](#10)

<a id="1"></a>
# <div style="text-align: center; background-color: #191414;font-size:120%; font-family:Courier New; color: #56E39F; padding: 20px; line-height: 1;border-radius:50px"><b>1. Imports</b></div>

In [1]:
import pandas as pd
import numpy as np

# visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.linear_model import LinearRegression

theme_colors = ['#FF1654', '#20A4F3', '#56E39F', '#F9C846', '#4C5760']

font = 'Courier New'

pd.set_option('display.max_columns', None)

  shapely_geos_version, geos_capi_version_string


In [2]:
original_df = pd.read_csv('../input/customer-personality-analysis/marketing_campaign.csv', sep="\t")
df = original_df.copy()

In [3]:
df.head()

Unnamed: 0,ID,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Z_CostContact,Z_Revenue,Response
0,5524,1957,Graduation,Single,58138.0,0,0,04-09-2012,58,635,88,546,172,88,88,3,8,10,4,7,0,0,0,0,0,0,3,11,1
1,2174,1954,Graduation,Single,46344.0,1,1,08-03-2014,38,11,1,6,2,1,6,2,1,1,2,5,0,0,0,0,0,0,3,11,0
2,4141,1965,Graduation,Together,71613.0,0,0,21-08-2013,26,426,49,127,111,21,42,1,8,2,10,4,0,0,0,0,0,0,3,11,0
3,6182,1984,Graduation,Together,26646.0,1,0,10-02-2014,26,11,4,20,10,3,5,2,2,0,4,6,0,0,0,0,0,0,3,11,0
4,5324,1981,PhD,Married,58293.0,1,0,19-01-2014,94,173,43,118,46,27,15,5,5,3,6,5,0,0,0,0,0,0,3,11,0


<a id="2"></a>
# <div style="text-align: center; background-color: #191414;font-size:120%; font-family:Courier New; color: #56E39F; padding: 20px; line-height: 1;border-radius:50px"><b>2. Data Cleaning</b></div>

In [4]:
df.drop(['ID', 'Z_CostContact', 'Z_Revenue'], axis=1, inplace=True)

In [5]:
df['Income'] = df['Income'].fillna(df['Income'].mean())

In [6]:
df['Age'] = 2015 - df["Year_Birth"]

In [7]:
df['Marital_Status'] = df['Marital_Status'].replace(['Married', 'Together'],'Relationship')
df['Marital_Status'] = df['Marital_Status'].replace(['Divorced', 'Widow', 'Alone', 'YOLO', 'Absurd'],'Single')

In [8]:
df['Education'] = df['Education'].replace(['PhD','2n Cycle', 'Master'],'PG')  
df['Education'] = df['Education'].replace(['Graduation', 'Basic'], 'UG')

In [9]:
df['ChildrenHome'] = df['Kidhome'] + df['Teenhome']

df['AmountSpent'] = df['MntWines'] + df['MntFruits'] + df['MntMeatProducts'] + df['MntFishProducts'] + df['MntSweetProducts'] + df['MntGoldProds']

df['TotalAcceptedCmp'] = df['AcceptedCmp1'] + df['AcceptedCmp2'] + df['AcceptedCmp3'] + df['AcceptedCmp4'] + df['AcceptedCmp5'] + df['Response']

df['NumTotalPurchases'] = df['NumWebPurchases'] + df['NumCatalogPurchases'] + df['NumStorePurchases'] + df['NumDealsPurchases']

In [10]:
df['Dt_Customer'] = pd.to_datetime(df.Dt_Customer)
df['first_day'] = '01-01-2015'
df['first_day'] = pd.to_datetime(df.first_day)
df['DaysEngaged'] = (df['first_day'] - df['Dt_Customer']).dt.days

In [11]:
cols_to_be_deleted = ["Kidhome", "Teenhome",
                      "MntWines", "MntFruits", "MntMeatProducts", "MntFishProducts", "MntSweetProducts", "MntGoldProds",
                      "NumWebVisitsMonth", "NumWebPurchases","NumCatalogPurchases","NumStorePurchases","NumDealsPurchases",
                      "AcceptedCmp1" , "AcceptedCmp2", "AcceptedCmp3" , "AcceptedCmp4","AcceptedCmp5", "Response",
                      "Dt_Customer", "first_day", "Year_Birth", "Dt_Customer", "Recency", "Complain"]

df=df.drop(columns=cols_to_be_deleted, axis=1)

In [12]:
## Rearranging the Columns
df = df[['Age', 'Education', 'Marital_Status', 'ChildrenHome', 'Income', 'TotalAcceptedCmp', 'AmountSpent', 'NumTotalPurchases', 'DaysEngaged']]
df.head()

Unnamed: 0,Age,Education,Marital_Status,ChildrenHome,Income,TotalAcceptedCmp,AmountSpent,NumTotalPurchases,DaysEngaged
0,58,UG,Single,0,58138.0,1,1617,25,997
1,61,UG,Single,2,46344.0,0,27,6,151
2,50,UG,Relationship,0,71613.0,0,776,21,498
3,31,UG,Relationship,1,26646.0,0,53,8,91
4,34,PG,Relationship,1,58293.0,0,422,19,347


<a id="3"></a>
# <div style="text-align: center; background-color: #191414;font-size:120%; font-family:Courier New; color: #56E39F; padding: 20px; line-height: 1;border-radius:50px"><b>3. Box Plot to Detect Outliers</b></div>

In [13]:
fig=make_subplots(rows=2,cols=2,subplot_titles=('<i>Age', '<i>Income', '<i>Amount Spent', '<i>Days Engaged'))
fig.add_trace(go.Box(x=df['Age'],name='Age'),row=1,col=1)
fig.add_trace(go.Box(x=df['Income'],name='Income'),row=1,col=2)
fig.add_trace(go.Box(x=df['AmountSpent'],name='AmountSpent'),row=2,col=1)
fig.add_trace(go.Box(x=df['DaysEngaged'],name='DaysEngaged'),row=2,col=2)

fig.update_yaxes(visible=False, showticklabels=False)
fig.update_layout(height=600, width=1000, title_text='<b>Box Plots to Check Outliers', font_size=20)
fig.update_layout(template='plotly_dark', title_x=0.5, font_family='Courier New', showlegend=False)

In [14]:
## Filtering to remove the Outlier Values
df = df[df['Age'] < 100]
df = df[df['Income'] < 150000]

In [15]:
df_describe = df.describe().T.reset_index()
df_describe = df_describe.round(2)

fig = go.Figure(data=[go.Table(
    header=dict(values=['Columns', 'Mean', 'Standard Deviation', 'Min', '25%', '50%', '75%', 'Max'],
                line_color='#032d6b',
                fill_color='paleturquoise',
                align='left'),
    cells=dict(values=[df_describe['index'],
                       df_describe['mean'],
                       df_describe['std'],
                       df_describe['min'],
                       df_describe['25%'],
                       df_describe['50%'],
                       df_describe['75%'],
                       df_describe['max']],
               line_color='darkslategray',
               fill_color='lavender',
               align='left'))
])

fig.update_layout(
    title = "Columns Statistical Table",
    width=1000,
    height=380
    )
fig.show()

<a id="4"></a>
# <div style="text-align: center; background-color: #191414;font-size:120%; font-family:Courier New; color: #56E39F; padding: 20px; line-height: 1;border-radius:50px"><b>4. Donut Charts</b></div>

In [16]:
education_count = df.groupby(['Education']).size().reset_index().rename(columns={0: 'count'})
marital_count = df.groupby(['Marital_Status']).size().reset_index().rename(columns={0: 'count'})
children_count = df.groupby(['ChildrenHome']).size().reset_index().rename(columns={0: 'count'})
campaigns_count = df.groupby(['TotalAcceptedCmp']).size().reset_index().rename(columns={0: 'count'})

fig = make_subplots(rows=2, cols=2,
                    specs=[[{'type':'domain'}, {'type':'domain'}],
                           [{'type':'domain'}, {'type':'domain'}]
                          ])

## Education Donut Chart
fig.add_trace(
    go.Pie(
        labels=education_count['Education'],
        values=education_count['count'],
        hole=.6,
        title='Education',
        titlefont={'color':None, 'size': 24},
        ),
    row=1,col=1
    )

## Marital_Status Donut Chart
fig.add_trace(
    go.Pie(
        labels=marital_count['Marital_Status'],
        values=marital_count['count'],
        hole=.6,
        title='Marital Status',
        titlefont={'color':None, 'size': 24},       
        ),
    row=1,col=2
    )

## ChildrenHome Donut Chart
fig.add_trace(
    go.Pie(
        labels=children_count['ChildrenHome'],
        values=children_count['count'],
        hole=.6,
        title='Children Home',
        titlefont={'color':None, 'size': 24},
        ),
    row=2,col=1
    )


## Total Accepted Campaigns Donut Chart
fig.add_trace(
    go.Pie(
        labels=campaigns_count['TotalAcceptedCmp'],
        values=campaigns_count['count'],
        hole=.6,
        title='Accepted Campaigns',
        titlefont={'color':None, 'size': 24},
        ),
    row=2,col=2
    )

fig.update_traces(
    hoverinfo='label+value',
    textinfo='label+percent',
    textfont_size=12,
    marker=dict(
        colors=theme_colors,
        line=dict(color='#EEEEEE',
                  width=2)
        )
    )


fig.layout.update(title="<b> Categorical Features Donut Charts <b>",
                  titlefont={'color':None, 'size': 28, 'family': 'Courier New'},
                  showlegend=False, 
                  height=800, 
                  width=1000,
                  template='plotly_dark',
                  title_x=0.5
                  )
fig.show()

<a id="5"></a>
# <div style="text-align: center; background-color: #191414;font-size:120%; font-family:Courier New; color: #56E39F; padding: 20px; line-height: 1;border-radius:50px"><b>5. Feature Distribution</b></div>

In [17]:
fig=make_subplots(rows=2,cols=2,subplot_titles=('<i>Age', '<i>Income', '<i>Amount Spent', '<i>Days Engaged'))
fig.add_trace(go.Histogram(x=df['Age'],name='Age'),row=1,col=1)
fig.add_trace(go.Histogram(x=df['Income'],name='Income'),row=1,col=2)
fig.add_trace(go.Histogram(x=df['AmountSpent'],name='AmountSpent'),row=2,col=1)
fig.add_trace(go.Histogram(x=df['DaysEngaged'],name='DaysEngaged'),row=2,col=2)

fig.update_layout(height=600, width=1000, title_text='<b>Histograms', font_size=20)
fig.update_layout(template='plotly_dark', title_x=0.5, font_family='Courier New', showlegend=False)

<a id="6"></a>
# <div style="text-align: center; background-color: #191414;font-size:120%; font-family:Courier New; color: #56E39F; padding: 20px; line-height: 1;border-radius:50px"><b>6. Scatter Plots </b></div>

In [18]:
def makeLine(df, x, y):
    model = LinearRegression().fit(np.array(df[x]).reshape(-1,1), (np.array(df[y])))
    y_pred = model.predict(np.array(df[x]).reshape(-1,1))
    return y_pred

In [19]:
fig = make_subplots(
    rows=2, cols=2, subplot_titles=("Age vs Income", "Age vs Amount Spent", "Age vs Number of Purchases", "Age vs Days Engaged")
)

# Add traces
fig.add_trace(go.Scatter(x=df['Age'], y=df['Income'], mode='markers'), row=1, col=1)
fig.add_trace(go.Scatter(x=df['Age'], y=makeLine(df, 'Age', 'Income'), mode='lines',name="Linear_reg_fit", marker_color='white'), row=1, col=1)

fig.add_trace(go.Scatter(x=df['Age'], y=df['AmountSpent'], mode='markers'), row=1, col=2)
fig.add_trace(go.Scatter(x=df['Age'], y=makeLine(df, 'Age', 'AmountSpent'), mode='lines',name="Linear_reg_fit", marker_color='white'), row=1, col=2)


fig.add_trace(go.Scatter(x=df['Age'], y=df['NumTotalPurchases'], mode='markers'), row=2, col=1)
fig.add_trace(go.Scatter(x=df['Age'], y=makeLine(df, 'Age', 'NumTotalPurchases'), mode='lines',name="Linear_reg_fit", marker_color='white'), row=2, col=1)

fig.add_trace(go.Scatter(x=df['Age'], y=df['DaysEngaged'], mode='markers'), row=2, col=2)
fig.add_trace(go.Scatter(x=df['Age'], y=makeLine(df, 'Age', 'DaysEngaged'), mode='lines',name="Linear_reg_fit", marker_color='white'), row=2, col=2)


# Update xaxis properties
fig.update_xaxes(title_text="Age", row=1, col=1)
fig.update_xaxes(title_text="Age", row=1, col=2)
fig.update_xaxes(title_text="Age", row=2, col=1)
fig.update_xaxes(title_text="Age", type="log", row=2, col=2)

# Update title and height
fig.update_layout(title_text="Scatter Plots wrt Age", title_x=0.5, height=800, template='plotly_dark', showlegend=False,
        font=dict(
            family="Rubik",
            size=14)
)

fig.show()

<a id="7"></a>
# <div style="text-align: center; background-color: #191414;font-size:120%; font-family:Courier New; color: #56E39F; padding: 20px; line-height: 1;border-radius:50px"><b>7. Sunbursts </b></div>

In [20]:
## Grouping Datasets
education_marital_df = df.groupby(['Education', 'Marital_Status']).size().reset_index().rename(columns={0: 'count'})
education_children_df = df.groupby(['Education', 'ChildrenHome']).size().reset_index().rename(columns={0: 'count'})

## Creating Sunburst Figures
sb1 = px.sunburst(education_marital_df, values='count', path=['Education', 'Marital_Status'])
sb2 = px.sunburst(education_children_df, values='count', path=['Education', 'ChildrenHome'])

## Subplots
fig = make_subplots(rows=2, cols=1, specs=[
    [{"type": "sunburst"},],
    [{"type": "sunburst"}]
], subplot_titles=("Education and Marital Status", "Education and Num of Children"))

## Plotting Figures
fig.add_trace(sb1.data[0], row=1, col=1)
fig.add_trace(sb2.data[0], row=2, col=1)

fig.update_traces(textinfo="label+percent parent")

# Update title and height
fig.update_layout(title_text="Sunbursts", title_x=0.5, height=900, template='plotly_dark', showlegend=False,
        font=dict(
            family="Rubik",
            size=14)
)

fig.show()

<a id="8"></a>
# <div style="text-align: center; background-color: #191414;font-size:120%; font-family:Courier New; color: #56E39F; padding: 20px; line-height: 1;border-radius:50px"><b>8. Multivariate Histograms </b></div>

In [21]:
fig = px.histogram(df, x="Income", color="Marital_Status", marginal="violin", template='plotly_dark')

fig.update_layout(title_text="Income vs Marital Status", title_x=0.5, height=500, template='plotly_dark',
        font=dict(
            family="Rubik",
            size=14)
)

fig.show()

In [22]:
fig = px.histogram(df, x="Income", color="Education", marginal="violin", template='plotly_dark')

fig.update_layout(title_text="Income vs Education", title_x=0.5, height=500, template='plotly_dark',
        font=dict(
            family="Rubik",
            size=14)
)

fig.show()

In [23]:
fig = px.histogram(df, x="Income", color="ChildrenHome", marginal="violin", template='plotly_dark')

fig.update_layout(title_text="Income vs Marital Status", title_x=0.5, height=500, template='plotly_dark',
        font=dict(
            family="Rubik",
            size=14)
)

fig.show()

<a id="9"></a>
# <div style="text-align: center; background-color: #191414;font-size:120%; font-family:Courier New; color: #56E39F; padding: 20px; line-height: 1;border-radius:50px"><b>9. Treemaps </b></div>

In [24]:
education_children = df.groupby(['Education', 'ChildrenHome', 'NumTotalPurchases']).size().reset_index().rename(columns={0: 'count'})

fig = px.treemap(education_children, path=[px.Constant("Customers"), 'Education', 'ChildrenHome'], values='count',
                  color='NumTotalPurchases', hover_data=['count'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(education_children['NumTotalPurchases'], weights=education_children['count']))

fig.update_traces(textinfo="label+percent parent")

fig.update_layout(title_text="<i> Education </i> X <i> NumofChildren </i> X <i> Total Purchases </i>", title_x=0.5, height=500, template='plotly_dark',
        font=dict(
            family="Rubik",
            size=14)
)

fig.show()

<blockquote style="margin-right:auto; font-family:Courier New; margin-left:auto; color:white; background-color: #56E39F; padding: 1em; margin:24px;">
   
<ul>
<li> <font color="white" size=+1.0><b> Number of Purchases is Inversely Proportional to the Number of Children. </b></font>
</ul> 
</blockquote>
                                                                                                                                            

In [25]:
marital_education_campaigns = df.groupby(['Marital_Status', 'Education', 'TotalAcceptedCmp', 'DaysEngaged']).size().reset_index().rename(columns={0: 'count'})

fig = px.treemap(marital_education_campaigns, path=[px.Constant("Customers"), 'Marital_Status', 'Education', 'TotalAcceptedCmp'], values='count',
                  color='DaysEngaged', hover_data=['count'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(marital_education_campaigns['DaysEngaged'], weights=marital_education_campaigns['count']))

fig.update_traces(textinfo="label+percent parent")

fig.update_layout(title_text="<i> Marital Status </i> X <i> Education </i> X <i> Number of Campaigns </i> X <i> Days Engaged </i>", title_x=0.5, height=500, template='plotly_dark',
        font=dict(
            family="Rubik",
            size=14)
)

fig.show()

<a id="10"></a>
# <div style="text-align: center; background-color: #191414;font-size:120%; font-family:Courier New; color: #56E39F; padding: 20px; line-height: 1;border-radius:50px"><b>10. 3D Scatter Plot </b></div>

In [26]:
fig = px.scatter_3d(df, x='Income', y='DaysEngaged', z='AmountSpent',
                    color='NumTotalPurchases')
fig.update_layout(title='<b> 3D Scatter Plot <b>', title_x=0.5,
                  titlefont=dict({'size':28, 'family': 'Courier New', 'color':'white'}),
                  template='plotly_dark',
                  width=900, height=500,
                 )
fig.update_layout(scene = dict(
                    xaxis = dict(
                         backgroundcolor="rgb(200, 200, 230)",
                         gridcolor="white",
                         showbackground=True,
                         zerolinecolor="black",),
                    yaxis = dict(
                        backgroundcolor="rgb(230, 200,230)",
                        gridcolor="white",
                        showbackground=True,
                        zerolinecolor="black"),
                    zaxis = dict(
                        backgroundcolor="rgb(230, 230,200)",
                        gridcolor="white",
                        showbackground=True,
                        zerolinecolor="black"),
                              ),
                  )
fig.show()

# <div style="text-align: center; background-color: #191414;font-size:200%; font-family:Courier New; color: #56E39F; padding: 20px; line-height: 1;border-radius:40px"><b>The End</b></div>