In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
df = pd.read_csv('WA_Fn-UseC_-Marketing-Customer-Value-Analysis.csv')

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.columns

In [None]:
df.groupby('Response').count()['Customer']

In [None]:
ax = df.groupby('Response').count()['Customer'].plot(
    kind='bar',
    color='orchid',
    grid=True,
    figsize=(10, 7), title='Marketing Engagement')
ax.set_xlabel('Engaged')
ax.set_ylabel('Count')
plt.show()

In [None]:
df.groupby('Response').count()['Customer']/df.shape[0]

In [None]:
by_offer_type_df = df.loc[
    df['Response'] == 'Yes', # count only engaged customers
].groupby([
'Renew Offer Type'# engaged customers grouped by renewal offer type
    ]).count()['Customer'] / df.groupby('Renew Offer Type').count()['Customer']
by_offer_type_df

In [None]:
ax = (by_offer_type_df*100.0).plot(kind='bar', figsize=(7, 7), color='dodgerblue', grid=True)
ax.set_ylabel('Engagement Rate (%)')
plt.show()

In [None]:
by_offer_type_df = df.loc[
    df['Response'] == 'Yes' # engaged customers
].groupby([
    'Renew Offer Type', 'Vehicle Class' # grouping the data by these two columns
]).count()['Customer'] / df.groupby('Renew Offer Type').count()['Customer']# rates fo
by_offer_type_df

In [None]:
# Make the previous output more readable using unstack function
# to pivot the data and extract and transform the inner-level groups to columns
by_offer_type_df = by_offer_type_df.unstack().fillna(0)
by_offer_type_df

In [None]:
ax = (by_offer_type_df*100.0).plot(
    kind='bar',
    figsize=(14, 8),
    grid=True )
ax.set_ylabel('Engagement Rate (%)')
plt.show()

In [None]:
# 2.4 - Engagement Rates by Sales Channel
# We are going to analyze how engagement rates differ by different sales channels.
by_sales_channel_df = df.loc[
             df['Response'] == 'Yes'
         ].groupby([
             'Sales Channel'
         ]).count()['Customer']/df.groupby('Sales Channel').count()['Customer']
by_sales_channel_df

In [None]:
ax = (by_sales_channel_df*100.0).plot( kind='bar', figsize=(7, 7), color='palegreen', grid=True )
ax.set_ylabel('Engagement Rate (%)')
plt.show()

In [None]:
# 2.5 - Sales Channel & Vehicle Size
# We are going to see whether customers with various vehicle sizes respond differently to different sales channels.
by_sales_channel_df = df.loc[
             df['Response'] == 'Yes'
         ].groupby([
             'Sales Channel', 'Vehicle Size'
         ]).count()['Customer'] / df.groupby('Sales Channel').count()['Customer']
by_sales_channel_df

In [None]:
# Unstack the data into a more visible format
by_sales_channel_df = by_sales_channel_df.unstack().fillna(0)
by_sales_channel_df

In [None]:
ax = (by_sales_channel_df*100.0).plot( kind='bar', figsize=(12, 7),grid=True )
ax.set_ylabel('Engagement Rate (%)')
plt.show()

In [None]:
# 2.6 - Engagement Rates by Months Since Policy Inception
by_months_since_inception_df = df.loc[
             df['Response'] == 'Yes'
         ].groupby(
             by='Months Since Policy Inception'
         )['Response'].count() / df.groupby(
             by='Months Since Policy Inception'
         )['Response'].count() * 100.0
by_months_since_inception_df.fillna(0)

In [None]:
ax = by_months_since_inception_df.fillna(0).plot( figsize=(10, 7),
    title='Engagement Rates by Months Since Inception', grid=True,
    color='skyblue')
ax.set_xlabel('Months Since Policy Inception')
ax.set_ylabel('Engagement Rate (%)')
plt.show()

In [None]:
# 3. Customer Segmentation by CLV & Months Since Policy Inception
# We are going to segment our customer base by Customer Lifetime Value and Months Since Policy Inception.
# Take a look at the distribution of the CLV 
df['Customer Lifetime Value'].describe()

In [None]:
df['CLV Segment'] = df['Customer Lifetime Value'].apply(
lambda x: 'High' if x > df['Customer Lifetime Value'].median() else 'Low'
)

In [None]:
# Do the same procedure for Months Since Policy Inception 
df['Months Since Policy Inception'].describe()

In [None]:
df['Policy Age Segment'] = df['Months Since Policy Inception'].apply(
lambda x: 'High' if x > df['Months Since Policy Inception'].median() else 'Low')

In [None]:
df.head()

In [None]:
# Visualize these segments
ax = df.loc[
        (df['CLV Segment'] == 'High') & (df['Policy Age Segment'] == 'High')].plot.scatter(
        x='Months Since Policy Inception', y='Customer Lifetime Value', logy=True, color='red')

df.loc[
    (df['CLV Segment'] == 'Low') & (df['Policy Age Segment'] == 'High')].plot.scatter(
    ax=ax,
    x='Months Since Policy Inception', y='Customer Lifetime Value', logy=True, color='blue')

df.loc[
    (df['CLV Segment'] == 'High') & (df['Policy Age Segment'] == 'Low')].plot.scatter(
    ax=ax,
    x='Months Since Policy Inception', y='Customer Lifetime Value', logy=True, color='orange')

df.loc[
    (df['CLV Segment'] == 'Low') & (df['Policy Age Segment'] == 'Low')].plot.scatter(
    ax=ax,
    x='Months Since Policy Inception',
    y='Customer Lifetime Value', logy=True,
    color='green',
    grid=True,
    figsize=(16, 10)
    )
# logy=True transform the scale to log scale and it is often used for monetary values 
# as they often have high skewness in their values. 
# We have repeated the code for the plot.scatter 4 times because we have created 4 segments.

ax.set_ylabel('CLV (in log scale)')
ax.set_xlabel('Months Since Policy Inception')
ax.set_title('Segments by CLV and Policy Age')
plt.show()

In [None]:
# See whether there is any noticeable difference in the engagement rates among these
engagement_rates_by_segment_df = df.loc[df['Response'] == 'Yes'].groupby(
    ['CLV Segment', 'Policy Age Segment']). count()['Customer'] / df.groupby([
    'CLV Segment', 'Policy Age Segment']).count()['Customer']
engagement_rates_by_segment_df

In [None]:
# Look at these differences in a chart
ax = (engagement_rates_by_segment_df.unstack()*100.0).plot(
    kind='bar',
    figsize=(10, 7),
    grid=True )
ax.set_ylabel('Engagement Rate (%)')
ax.set_title('Engagement Rates by Customer Segments')
plt.show()

### As we can notice, High Policy Age Segment has higher engagement than the Low Policy Age Segment. This suggests that those customers who have been insured by this company longer re- spond better. Moreover, the High Policy Age and Low CLV segment has the highest engagement rate among the four segments.
### By creating different customer segments based on customer attributes, we can better under- stand how different groups of customers behave differently, and consequently, use this informa- tion to customize the marketing messagges.