## Case Study - Marketing Analytics

http://archive.ics.uci.edu/ml/datasets/Bank+Marketing

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

pd.options.display.max_rows = 10

In [None]:
%matplotlib inline

# Conversion Rate

In [None]:
url = 'http://people.bu.edu/kalathur/datasets/bank-additional-full.csv'
df = pd.read_csv(url, sep=';')

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df[['age','job','education','contact','duration','campaign','y']]

In [None]:
df['conversion'] = df['y'].apply(lambda x: 1 if x == 'yes' else 0)

In [None]:
df[['age','job','education','contact','duration','campaign','y','conversion']]

In [None]:
df['campaign'].value_counts()

### 1. Aggregate Conversion Rate

In [None]:
print('Total conversions: {} out of {}'.format(
    df.conversion.sum(), len(df)))

In [None]:
print('Conversion rate: {:.2f}%'.format(
    df.conversion.sum() / len(df) * 100.0))

### 2. Conversion Rates by Number of Contacts

In [None]:
df.groupby('campaign')['conversion'].sum()

In [None]:
df.groupby('campaign')['conversion'].count()

In [None]:
conversions_by_contacts = df.groupby('campaign')['conversion'].agg(
    [np.sum, len])
conversions_by_contacts

In [None]:
pd.crosstab(df['campaign'], df['conversion'], margins=all).drop(0, axis=1)

In [None]:
conversions_by_contacts['rate'] = conversions_by_contacts[
    'sum'] / conversions_by_contacts['len'] * 100.0
conversions_by_contacts

In [None]:
ax = conversions_by_contacts['rate'][:10].plot(
    grid=True,
    figsize=(10, 7),
    xticks=conversions_by_contacts.index[:10],
    title='Conversion Rates by Number of Contacts'
)

ax.set_ylim([0, 15])
ax.set_xlabel('number of contacts')
ax.set_ylabel('conversion rate (%)')

plt.show()

### 3. Conversion Rates by Age

In [None]:
conversions_by_age = df.groupby('age')['conversion'].agg(
    [np.sum, len])

conversions_by_age

In [None]:
# same as

pd.crosstab(df['age'], df['conversion'], margins=all).drop(0, axis=1)


In [None]:
conversions_by_age['rate'] = \
    conversions_by_age['sum']/conversions_by_age['len'] * 100.0

conversions_by_age.sort_values(by='rate', ascending=False)

In [None]:
ax = conversions_by_age['rate'].plot(
    grid=True,
    figsize=(10, 7),
    title='Conversion Rates by Age'
)

ax.set_xlabel('age')
ax.set_ylabel('conversion rate (%)')

plt.show()

#### Analyze by Age Groups

In [None]:
df['age'].min(), df['age'].max()

In [None]:
df['age_group'] = df['age'].apply(
    lambda x: '[17, 30)' if x < 30 else '[30, 40)' if x < 40 \
        else '[40, 50)' if x < 50 else '[50, 60)' if x < 60 \
        else '[60, 70)' if x < 70 else '70+'
)

In [None]:
df[['age','age_group','job','education','contact','duration','campaign','y','conversion']]

In [None]:
df['age_group'] = pd.cut(df['age'], 
                         [17,30,40,50,60,70,100], right=False)

df[['age','age_group','job','education','contact','duration','campaign','y','conversion']]

In [None]:
conversions_by_age_group = df.groupby('age_group')['conversion'].agg(
    [np.sum, len])
conversions_by_age_group

In [None]:
# same as

pd.crosstab(df['age_group'], df['conversion'], margins=True).drop(0, axis=1)

In [None]:
conversions_by_age_group['rate'] = \
    conversions_by_age_group['sum']/conversions_by_age_group['len']*100

conversions_by_age_group

In [None]:
conversions_by_age_group.sort_values(by='rate', ascending=False)

In [None]:
ax = conversions_by_age_group['rate'].plot(
    kind='bar',
    color='skyblue',
    grid=True,
    figsize=(10, 7),
    title='Conversion Rates by Age Groups'
)

ax.set_xlabel('age')
ax.set_ylabel('conversion rate (%)')

plt.show()

### 4. Conversions vs. Non-Conversions

#### 4.1. Marital Status

In [None]:
conversions_by_marital_status = \
    df.pivot_table(index='marital', columns='conversion',
                   values='y', aggfunc=len)

conversions_by_marital_status

In [None]:
pd.crosstab(df['marital'], df['conversion'])

In [None]:
conversions_by_marital_status.columns = ['non_conversions', 'conversions']
conversions_by_marital_status

In [None]:
conversions_by_marital_status.plot(
    kind='pie',
    figsize=(15, 7),
    startangle=90,
    subplots=True,
    autopct=lambda x: '%0.1f%%' % x
)

plt.show()

#### 4.2. Education

In [None]:
conversions_by_education = df.pivot_table(
    index='education', columns='conversion',
    values='y',  aggfunc=len)

conversions_by_education

In [None]:
# same as

pd.crosstab(df['education'], df['conversion'])

In [None]:
conversions_by_education.columns = ['non_conversions', 'conversions']
conversions_by_education

In [None]:
conversions_by_education.plot(
    kind='pie',
    figsize=(15, 7),
    startangle=90,
    subplots=True,
    autopct=lambda x: '%0.1f%%' % x,
    legend=False
)

plt.show()

#### 4.3. Last Contact Duration

In [None]:
df.groupby('conversion')['duration'].describe()

In [None]:
duration_conversions = \
    df.loc[df['conversion'] == 1, 'duration'].reset_index(drop=True)

duration_conversions

In [None]:
duration_nonconversions = \
    df.loc[df['conversion'] == 0, 'duration'].reset_index(drop=True)

duration_nonconversions

In [None]:
durations_df = pd.DataFrame(
    {'conversions': duration_conversions,
     'non_conversions': duration_nonconversions})

durations_df

In [None]:
ax = durations_df.plot(
    kind='box', 
    grid=True,
    figsize=(10,7)
)

ax.set_title('Last Contact Duration')
ax.set_xlabel('last contact duration (minutes)')

plt.show()

### 5. Conversions by Age Groups & Marital Status

In [None]:
df.groupby(['age_group', 'marital'])['conversion'].sum().unstack()

In [None]:
# same as

df.pivot_table(
    index='age_group', columns='marital',
    values='conversion',  aggfunc=np.sum)

In [None]:
# same as

pd.crosstab([df['conversion'], df['age_group']], df['marital']).xs(1)


In [None]:
age_marital_df = df.pivot_table(
    index='age_group', columns='marital',
    values='conversion',  aggfunc=np.sum).fillna(0)

age_marital_df

In [None]:
group_counts = df.groupby('age_group')['conversion'].count()
group_counts

In [None]:
age_marital_df = age_marital_df.divide(group_counts,axis=0)

age_marital_df

In [None]:
ax = age_marital_df.plot(
    kind='bar', 
    grid=True,
    figsize=(10,7)
)

ax.set_title('Conversion rates by Age & Marital Status')
ax.set_xlabel('age group')
ax.set_ylabel('conversion rate (%)')

plt.show()

In [None]:
ax = age_marital_df.plot(
    kind='bar', 
    stacked=True,
    grid=True,
    figsize=(10,7)
)

ax.set_title('Conversion rates by Age & Marital Status')
ax.set_xlabel('age group')
ax.set_ylabel('conversion rate (%)')

plt.show()