# Connecting the Datasets

This notebook will combine our datasets and look into what impact consumer events have on mortgage state.

### Import libraries

In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Load, view and merge datasets

In [5]:
# Load datasets
app_df = pd.read_csv('applicant.csv')
evt_df = pd.read_csv('event.csv')
mtg_df = pd.read_csv('mortgage.csv')

# View datasets
display(app_df.head(2))
display(evt_df.head(2))
display(mtg_df.head(2))

Unnamed: 0,applicant_id,applicant_mortgage_id,applicant_consumer_id
0,ccc6ff06ea696696b4297f1dbf727350,a9e9412e0f8af6080bb3fbc717796549,01ebee02ac29462580e70365033564b0
1,337e4de1829895a00c73fc37917bf066,634ede757f052342e036b0ad6d22d303,47cdf18b3fe4aa332ee352c2926783c6


Unnamed: 0,event_id,event_type,event_created_at,event_consumer_id
0,d22abb371f9c3f11346f4c7be25090e9,email_open,2020-12-01T07:51:31Z,01978065ceee79b338d479bd5eac41da
1,fa82669e074cf6238d5da6fefe4d0418,email_sent,2022-05-09T09:31:37Z,ff5a2159a4a246182a412fed5d68487d


Unnamed: 0,mortgage_id,mortgage_type,mortgage_payment_type,mortgage_property_value,mortgage_loan_amount,mortgage_initial_rate,mortgage_scheme,mortgage_state_updated
0,1ad9ba91c6541dd973cc924b6db7e311,owner_occupied,repayment,550000,290000,2.2,fixed,open
1,317ac7153428730d726b5a4b7a71122b,owner_occupied,repayment,100000,70000,2.9,fixed,not_interested


We can see that we can merge the applicant and mortgage dataframes on applicant_consumer_id and event_consumer_id, and subsquently merge this on applicant_mortgage_id and mortgage_id:

In [7]:
# Rename columns
app_df.rename(columns={'applicant_consumer_id': 'consumer_id', 'applicant_mortgage_id': 'mortgage_id'}, inplace=True)
evt_df.rename(columns={'event_consumer_id': 'consumer_id'}, inplace=True)

# Merge dataframes
merge_df = pd.merge(app_df, evt_df, on='consumer_id', how='inner')
df = pd.merge(merge_df, mtg_df, on='mortgage_id', how='inner')

We can look at our new dataframe:

In [8]:
df.describe(include='all').T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
applicant_id,48565.0,1932.0,5cfad988a68337e7629afa7de36500e4,623.0,,,,,,,
mortgage_id,48565.0,1932.0,feaea1c15ad08db0762cf14196c4314c,623.0,,,,,,,
consumer_id,48565.0,1842.0,b7730d37a71e86ea762a5367a9e88bf9,623.0,,,,,,,
event_id,48565.0,45370.0,10cee16ef7f12ffaff79bbc7588d0b98,4.0,,,,,,,
event_type,48565.0,7.0,email_open,26098.0,,,,,,,
event_created_at,48565.0,31084.0,2021-09-14T08:01:34Z,918.0,,,,,,,
mortgage_type,48565.0,2.0,owner_occupied,48148.0,,,,,,,
mortgage_payment_type,48565.0,3.0,repayment,47448.0,,,,,,,
mortgage_property_value,48565.0,,,,298565.015958,207190.97707,40000.0,170000.0,250000.0,380000.0,9500000.0
mortgage_loan_amount,48565.0,,,,149373.21116,106069.950383,10000.0,90000.0,130000.0,180000.0,2280000.0


In [9]:
df.head()

Unnamed: 0,applicant_id,mortgage_id,consumer_id,event_id,event_type,event_created_at,mortgage_type,mortgage_payment_type,mortgage_property_value,mortgage_loan_amount,mortgage_initial_rate,mortgage_scheme,mortgage_state_updated
0,ccc6ff06ea696696b4297f1dbf727350,a9e9412e0f8af6080bb3fbc717796549,01ebee02ac29462580e70365033564b0,59621e0c159c1e6889f215e24dc46154,email_sent,2021-08-30T08:01:45Z,owner_occupied,repayment,190000,100000,2.5,fixed,open
1,ccc6ff06ea696696b4297f1dbf727350,a9e9412e0f8af6080bb3fbc717796549,01ebee02ac29462580e70365033564b0,f9ad79ba968698f44113dc945970d32c,email_open,2021-08-30T08:21:43Z,owner_occupied,repayment,190000,100000,2.5,fixed,open
2,337e4de1829895a00c73fc37917bf066,634ede757f052342e036b0ad6d22d303,47cdf18b3fe4aa332ee352c2926783c6,f27046a6716ef26c6c41edc5aaeaab54,email_sent,2021-08-30T08:01:45Z,owner_occupied,repayment,420000,100000,2.1,fixed,open
3,2730b41d0474e03194055d3ebce3c33b,e2b5ea63e8b163308787d05250ce71ad,e7b37f9f4603b3b56aaf9ccba7133fb6,96efbf98d7bc994aa3956e0489382669,email_sent,2022-05-23T09:31:14Z,owner_occupied,repayment,420000,190000,1.8,fixed,open
4,2730b41d0474e03194055d3ebce3c33b,e2b5ea63e8b163308787d05250ce71ad,e7b37f9f4603b3b56aaf9ccba7133fb6,a5935711388ffdebb5adda1c3f026d3b,email_sent,2022-06-06T09:40:58Z,owner_occupied,repayment,420000,190000,1.8,fixed,open


We can group this data by mortgage state and event type to show how many instance of different combinations there are:

In [10]:
# Group by mortgage_state_updated and event_type to count occurrences
impact_analysis = df.groupby(['mortgage_state_updated', 'event_type']).size().unstack(fill_value=0)

impact_analysis

event_type,account_created,blog_post_read,call_requested,email_open,email_sent,logged_in,pageview
mortgage_state_updated,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
found_refinance,18,31,20,633,371,32,222
lost,59,39,48,6356,3700,34,532
not_interested,112,122,112,4733,1334,149,1950
open,163,190,119,6358,4026,111,2626
retained,236,241,373,8018,2018,264,3215


In [13]:
# Calculate the total count for each state of mortgage
total_counts_mtg_state = impact_analysis.sum(axis=1)

# Convert the counts to percentages
impact_analysis_percentage_mtg_state = impact_analysis.div(total_counts_mtg_state, axis=0) * 100

impact_analysis_percentage_mtg_state

event_type,account_created,blog_post_read,call_requested,email_open,email_sent,logged_in,pageview
mortgage_state_updated,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
found_refinance,1.356443,2.336096,1.507159,47.701583,27.9578,2.411454,16.729465
lost,0.54792,0.362184,0.445765,59.026746,34.36107,0.31575,4.940565
not_interested,1.315789,1.433271,1.315789,55.603853,15.671992,1.75047,22.908835
open,1.199147,1.397778,0.875451,46.774075,29.618186,0.816597,19.318767
retained,1.642882,1.677689,2.596589,55.81622,14.048033,1.8378,22.380787


In [15]:
# Calculate the total count for each event type
total_counts_evt_type = impact_analysis.sum(axis=0)

# Convert the counts to percentages
impact_analysis_percentage_evt_type = impact_analysis.div(total_counts_evt_type, axis=1) * 100

impact_analysis_percentage_evt_type

event_type,account_created,blog_post_read,call_requested,email_open,email_sent,logged_in,pageview
mortgage_state_updated,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
found_refinance,3.061224,4.975923,2.97619,2.425473,3.240458,5.423729,2.598011
lost,10.034014,6.260032,7.142857,24.354357,32.317233,5.762712,6.225863
not_interested,19.047619,19.582665,16.666667,18.135489,11.651673,25.254237,22.820363
open,27.721088,30.497592,17.708333,24.36202,35.164643,18.813559,30.731422
retained,40.136054,38.683788,55.505952,30.722661,17.625994,44.745763,37.624342
