# Hypothesis - Device Activity (only step, no analysis)

## Data Cleaning

In [19]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.utils import resample
from sklearn.model_selection import train_test_split

In [20]:
# import data
X_train = pd.read_csv('../data/processed/X_train.csv')
y_train = pd.read_csv('../data/processed/y_train.csv')
data = pd.concat([y_train, X_train], axis=1)

print(f'data shape: {data.shape}')
data.head()

data shape: (52776, 31)


Unnamed: 0,fraud_bool,income,name_email_similarity,prev_address_months_count,current_address_months_count,customer_age,days_since_request,intended_balcon_amount,payment_type,zip_count_4w,...,bank_months_count,has_other_cards,proposed_credit_limit,foreign_request,source,session_length_in_minutes,device_os,keep_alive_session,device_distinct_emails_8w,month
0,0,0.9,0.383245,28,26,30,0.017751,-1.0,AB,1125,...,11,0,200.0,0,INTERNET,20.301149,macintosh,0,1,2
1,0,0.8,0.717703,-1,63,40,0.011542,52.68933,AA,791,...,20,0,200.0,0,INTERNET,4.309454,other,1,1,2
2,0,0.3,0.517523,-1,78,20,0.008581,-1.0,AB,1844,...,1,1,1500.0,0,INTERNET,4.491135,other,0,1,0
3,0,0.9,0.35826,-1,45,40,0.016844,-1.0,AC,1150,...,-1,0,500.0,0,INTERNET,1.507028,windows,1,2,2
4,0,0.8,0.183328,11,92,20,0.015774,-1.0,AD,525,...,1,0,500.0,0,INTERNET,2.000494,linux,1,1,1


In [21]:
data['fraud_bool'].value_counts()

fraud_bool
0    43957
1     8819
Name: count, dtype: int64

## EDA Bank Activity and Device Activity

**Hypothesis: Fraudulent Bank Accounts have unique characteristics in relation to how their device activity.**
* More likely to use a phone that has more lax security i.e. windows
* More likely to have more than 1 email in each device. --> more emails to create more fake accounts
* If a phone has more than 1 email from each device, then it keep_alive_session likely to be false (i.e. value=0) --> convienience on fraudster's end. troublesome to log in and out continually.
* Session length will also be short??

**Columns considered**

* Device Activity:
    * `session_length_in_minutes`
    * `device_os`
    * `keep_alive_session`
    * `device_distinct_emails_8w`
    * `source`
    * `foreign_request`

**Explanation of Columns**

| Column Name | Description | Link to Fraud |
|:------------|:------------|:--------------|
| device_distinct_emails_8w | No. of distinct emails in banking website from the used device in last 8 weeks. <br> Simply put, if I use my email address to log into the banking website, <br> then I effectively used 1 distinct email address. <br>If I leave the phone unattended for 8 weeks, then this value becomes 0 due to inactivity for 8 weeks. | Spare phones are commonly used to prevent banking companies from tracking them. <br> Thus, able to explain the inactivity. When the phone is actually used, <br>the fraudsters are likely to use more than 2 distinct email address for 2 accounts using the same phone.


In [22]:
total_fraud_count = data['fraud_bool'].value_counts().to_frame().loc[1, "count"]
total_non_fraud_count = data['fraud_bool'].value_counts().to_frame().loc[0, "count"]

# print total counts of fraud and non-fraud
print(f"Total Fraud Count: {total_fraud_count}")
print(f"Total Non-Fraud Count: {total_non_fraud_count}")
print(f"Total Count: {total_fraud_count + total_non_fraud_count} \t Data Shape: {data.shape}")

Total Fraud Count: 8819
Total Non-Fraud Count: 43957
Total Count: 52776 	 Data Shape: (52776, 31)


In [23]:
def getFraudInfo(group):
    # get total count of fraud within this group
    fraud_count = group['fraud_bool'].value_counts().to_frame().loc[1, "count"]
    total_count = group.shape[0]
    fraud_proportion = round(fraud_count / total_count, 4)

    group['num_fraud'] = fraud_count
    group['proportion_of_fraud_in_group'] = fraud_proportion
    return group[['num_fraud', 'proportion_of_fraud_in_group']].drop_duplicates()

## Feature 1: `FE_01_device_os_emails_prob`

In [24]:
# 5 groups for feature 1
# group01 = filter for conditions where device_os == windows and device_distinct_emails_8w.isin([0,2])
# group02 = filter for conditions where device_os == macintosh and device_distinct_emails_8w.isin([0,2])
# group03 = filter for conditions where device_os == x11 and device_distinct_emails_8w.isin([0,2])
# group04 = filter for conditions where device_os == other and device_distinct_emails_8w.isin([0,2])
# group05 = everything else

# split data into 5 groups by these conditions
group01 = data[(data['device_os'] == 'windows') & (data['device_distinct_emails_8w'].isin([0, 2]))]
group02 = data[(data['device_os'] == 'macintosh') & (data['device_distinct_emails_8w'].isin([0, 2]))]
group03 = data[(data['device_os'] == 'x11') & (data['device_distinct_emails_8w'].isin([0, 2]))]
group04 = data[(data['device_os'] == 'other') & (data['device_distinct_emails_8w'].isin([0, 2]))]
# group05 will be all the groups except those in group01, group02, group03, group04
group05 = data[~data.index.isin(group01.index) & ~data.index.isin(group02.index) & ~data.index.isin(group03.index) & ~data.index.isin(group04.index)]

# combine into 1 dataframe, add label to that dataframe, label each group 'A', 'B', 'C', 'D', 'E', all in new column 'FE_01'
group01['FE_01'] = 'A'; group02['FE_01'] = 'B'; group03['FE_01'] = 'C'; group04['FE_01'] = 'D'; group05['FE_01'] = 'E'

# add back
data = pd.concat([group01, group02, group03, group04, group05])

# change 'FE_01' to category
data['FE_01'] = data['FE_01'].astype('category')

# assign proability of fraud based on label, so if data['FE_01'] labelled 'A', then assign corresponding probability of fraud
FE_01_prob = data.groupby(['FE_01']).apply(getFraudInfo).reset_index()[['FE_01', 'proportion_of_fraud_in_group']]

FE_01_prob_mapping = {"A": 0.6047,
                      "B": 0.4529,
                      "C": 0.3846,
                      "D": 0.2978,
                      "E": 0.1564}

# map the probability of fraud to the device_acitivtiy_df, as a new column 'FE_01_device_os_emails_prob'
data['FE_01_device_os_emails_prob'] = data['FE_01'].map(FE_01_prob_mapping)

# check shape and head
print(f'data shape: {data.shape}')
data.head()

data shape: (52776, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group01['FE_01'] = 'A'; group02['FE_01'] = 'B'; group03['FE_01'] = 'C'; group04['FE_01'] = 'D'; group05['FE_01'] = 'E'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group01['FE_01'] = 'A'; group02['FE_01'] = 'B'; group03['FE_01'] = 'C'; group04['FE_01'] = 'D'; group05['FE_01'] = 'E'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#

Unnamed: 0,fraud_bool,income,name_email_similarity,prev_address_months_count,current_address_months_count,customer_age,days_since_request,intended_balcon_amount,payment_type,zip_count_4w,...,proposed_credit_limit,foreign_request,source,session_length_in_minutes,device_os,keep_alive_session,device_distinct_emails_8w,month,FE_01,FE_01_device_os_emails_prob
3,0,0.9,0.35826,-1,45,40,0.016844,-1.0,AC,1150,...,500.0,0,INTERNET,1.507028,windows,1,2,2,A,0.6047
19,1,0.9,0.994705,-1,187,50,0.032755,-1.0,AC,1781,...,200.0,0,INTERNET,2.909931,windows,0,2,0,A,0.6047
105,0,0.7,0.842539,-1,2,60,0.005686,-1.0,AB,1397,...,500.0,0,INTERNET,19.697471,windows,0,2,7,A,0.6047
128,0,0.5,0.549786,-1,50,40,27.293091,-1.0,AC,1298,...,1500.0,0,INTERNET,4.508612,windows,0,0,1,A,0.6047
151,1,0.9,0.315475,-1,185,40,0.031028,-1.0,AC,1451,...,2000.0,0,INTERNET,30.256072,windows,0,2,4,A,0.6047


## Feature 2: `FE_02_keep_alive_device_emails_prob`

In [25]:
# divide into 6 groups, one for each unique permutation of keep_alive_session and device_distinct_emails_8w
group01 = data[(data['keep_alive_session'] == 0) & (data['device_distinct_emails_8w'] == 0)]
group02 = data[(data['keep_alive_session'] == 0) & (data['device_distinct_emails_8w'] == 1)]
group03 = data[(data['keep_alive_session'] == 0) & (data['device_distinct_emails_8w'] == 2)]
group04 = data[(data['keep_alive_session'] == 1) & (data['device_distinct_emails_8w'] == 0)]
group05 = data[(data['keep_alive_session'] == 1) & (data['device_distinct_emails_8w'] == 1)]
group06 = data[(data['keep_alive_session'] == 1) & (data['device_distinct_emails_8w'] == 2)]

# label each group from "A" to "F"
group01['FE_02'] = "A"; group02['FE_02'] = "B"; group03['FE_02'] = "C"; group04['FE_02'] = "D"; group05['FE_02'] = "E"; group06['FE_02'] = "F"

# concatenate all the group
data = pd.concat([group01, group02, group03, group04, group05, group06])

# change 'FE_02' to category
data['FE_02'] = data['FE_02'].astype('category')

# generate mapping
FE_02_prob_mappping = {"A": 0.3520,
                       "B": 0.2230,
                       "C": 0.4732,
                       "D": 0.1591,
                       "E": 0.1016,
                       "F": 0.3131
                       }

# map the probability of fraud to the device_acitivtiy_df, as a new column 'FE_01_device_os_emails_prob'
data['FE_02_keep_alive_device_emails_prob'] = data['FE_02'].map(FE_02_prob_mappping)

# check shape and head
print(f'data shape: {data.shape}')
data.head()

data shape: (52776, 35)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group01['FE_02'] = "A"; group02['FE_02'] = "B"; group03['FE_02'] = "C"; group04['FE_02'] = "D"; group05['FE_02'] = "E"; group06['FE_02'] = "F"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group01['FE_02'] = "A"; group02['FE_02'] = "B"; group03['FE_02'] = "C"; group04['FE_02'] = "D"; group05['FE_02'] = "E"; group06['FE_02'] = "F"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.

Unnamed: 0,fraud_bool,income,name_email_similarity,prev_address_months_count,current_address_months_count,customer_age,days_since_request,intended_balcon_amount,payment_type,zip_count_4w,...,source,session_length_in_minutes,device_os,keep_alive_session,device_distinct_emails_8w,month,FE_01,FE_01_device_os_emails_prob,FE_02,FE_02_keep_alive_device_emails_prob
128,0,0.5,0.549786,-1,50,40,27.293091,-1.0,AC,1298,...,INTERNET,4.508612,windows,0,0,1,A,0.6047,A,0.352
1422,1,0.7,0.174847,-1,61,70,0.003595,9.522029,AB,1486,...,INTERNET,3.560275,windows,0,0,2,A,0.6047,A,0.352
1572,0,0.8,0.849118,-1,202,40,0.010027,-1.0,AC,3229,...,INTERNET,3.535784,windows,0,0,1,A,0.6047,A,0.352
2380,0,0.7,0.366895,-1,300,50,0.017651,41.225517,AA,3009,...,INTERNET,6.786212,windows,0,0,3,A,0.6047,A,0.352
2739,1,0.7,0.359558,-1,65,40,0.011045,-1.0,AC,673,...,INTERNET,6.204049,windows,0,0,5,A,0.6047,A,0.352


## Feature 3: `FE_03_source_foreign_request_prob`

In [26]:
# divide into 4 groups, one for each unique permutation of foreign_request and source
group01 = data[(data['source'] == "INTERNET") & (data['foreign_request'] == 0)]
group02 = data[(data['source'] == "INTERNET") & (data['foreign_request'] == 1)]
group04 = data[(data['source'] == "TELEAPP") & (data['foreign_request'] == 0)]
group03 = data[(data['source'] == "TELEAPP") & (data['foreign_request'] == 1)]

# label each group from "A" to "D", label_name = "FE_03"
group01['FE_03'] = "A"; group02['FE_03'] = "B"; group03['FE_03'] = "C"; group04['FE_03'] = "D"


# concatenate all the group
data = pd.concat([group01, group02, group03, group04])

# change 'FE_02' to category
data['FE_03'] = data['FE_03'].astype('category')

# generate mapping
FE_03_prob_mappping = {"A": 0.1627,
                       "B": 0.2782,
                       "C": 0.2448,
                       "D": 0.5000
                       }

# map the probability of fraud to the device_acitivtiy_df, as a new column 'FE_01_device_os_emails_prob'
data['FE_03_source_foreign_request_prob'] = data['FE_03'].map(FE_03_prob_mappping)

# check shape and head
print(f'data shape: {data.shape}')
data.head()

data shape: (52776, 37)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group01['FE_03'] = "A"; group02['FE_03'] = "B"; group03['FE_03'] = "C"; group04['FE_03'] = "D"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group01['FE_03'] = "A"; group02['FE_03'] = "B"; group03['FE_03'] = "C"; group04['FE_03'] = "D"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group01['FE_03'

Unnamed: 0,fraud_bool,income,name_email_similarity,prev_address_months_count,current_address_months_count,customer_age,days_since_request,intended_balcon_amount,payment_type,zip_count_4w,...,device_os,keep_alive_session,device_distinct_emails_8w,month,FE_01,FE_01_device_os_emails_prob,FE_02,FE_02_keep_alive_device_emails_prob,FE_03,FE_03_source_foreign_request_prob
128,0,0.5,0.549786,-1,50,40,27.293091,-1.0,AC,1298,...,windows,0,0,1,A,0.6047,A,0.352,A,0.1627
1422,1,0.7,0.174847,-1,61,70,0.003595,9.522029,AB,1486,...,windows,0,0,2,A,0.6047,A,0.352,A,0.1627
1572,0,0.8,0.849118,-1,202,40,0.010027,-1.0,AC,3229,...,windows,0,0,1,A,0.6047,A,0.352,A,0.1627
2380,0,0.7,0.366895,-1,300,50,0.017651,41.225517,AA,3009,...,windows,0,0,3,A,0.6047,A,0.352,A,0.1627
2739,1,0.7,0.359558,-1,65,40,0.011045,-1.0,AC,673,...,windows,0,0,5,A,0.6047,A,0.352,A,0.1627


## Feature 4: `FE_04_device_os_foreign_request_prob`

In [27]:
# group into 4 groups
# divide into 4 groups, one for each unique permutation of foreign_request and source
group01 = data[(data['device_os'] == "windows") & (data['foreign_request'] == 0)]
group02 = data[(data['device_os'] == "windows") & (data['foreign_request'] == 1)]
group03 = data[(data['device_os'] == "macintosh") & (data['foreign_request'] == 0)]
group04 = data[~data.index.isin(group01.index) & ~data.index.isin(group02.index) & ~data.index.isin(group03.index)]

# label each group from "A" to "D", label_name = "FE_03"
group01['FE_04'] = "A"; group02['FE_04'] = "B"; group03['FE_04'] = "C"; group04['FE_04'] = "D"

# concatenate all the group
data = pd.concat([group01, group02, group03, group04])

# change 'FE_04' to category
data['FE_04'] = data['FE_04'].astype('category')

# generate mapping
FE_04_prob_mappping = {"A": 0.3027,
                       "B": 0.4605,
                       "C": 0.2010,
                       "D": 0.0916
                       }

# map the probability of fraud to the device_acitivtiy_df, as a new column 'FE_01_device_os_emails_prob'
data['FE_04_device_os_foreign_request_prob'] = data['FE_04'].map(FE_04_prob_mappping)

# check shape and head
print(f'data shape: {data.shape}')
data.head()

data shape: (52776, 39)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group01['FE_04'] = "A"; group02['FE_04'] = "B"; group03['FE_04'] = "C"; group04['FE_04'] = "D"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group01['FE_04'] = "A"; group02['FE_04'] = "B"; group03['FE_04'] = "C"; group04['FE_04'] = "D"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group01['FE_04'

Unnamed: 0,fraud_bool,income,name_email_similarity,prev_address_months_count,current_address_months_count,customer_age,days_since_request,intended_balcon_amount,payment_type,zip_count_4w,...,device_distinct_emails_8w,month,FE_01,FE_01_device_os_emails_prob,FE_02,FE_02_keep_alive_device_emails_prob,FE_03,FE_03_source_foreign_request_prob,FE_04,FE_04_device_os_foreign_request_prob
128,0,0.5,0.549786,-1,50,40,27.293091,-1.0,AC,1298,...,0,1,A,0.6047,A,0.352,A,0.1627,A,0.3027
1422,1,0.7,0.174847,-1,61,70,0.003595,9.522029,AB,1486,...,0,2,A,0.6047,A,0.352,A,0.1627,A,0.3027
1572,0,0.8,0.849118,-1,202,40,0.010027,-1.0,AC,3229,...,0,1,A,0.6047,A,0.352,A,0.1627,A,0.3027
2380,0,0.7,0.366895,-1,300,50,0.017651,41.225517,AA,3009,...,0,3,A,0.6047,A,0.352,A,0.1627,A,0.3027
2739,1,0.7,0.359558,-1,65,40,0.011045,-1.0,AC,673,...,0,5,A,0.6047,A,0.352,A,0.1627,A,0.3027


## Feature 5: `FE_05_device_os_prob`

In [28]:
# generate mapping
FE_05_prob_mappping = {"windows": 0.3083,
                       "macintosh": 0.2041,
                       "linux": 0.0001,
                       "other": 0.0001,
                       "x11": 0.0001
                       }

# map the probability of fraud to the device_acitivtiy_df, as a new column 'FE_01_device_os_emails_prob'
data['FE_05_device_os_prob'] = data['device_os'].map(FE_05_prob_mappping)

# check shape and head
print(f'data shape: {data.shape}')
data.head()

data shape: (52776, 40)


Unnamed: 0,fraud_bool,income,name_email_similarity,prev_address_months_count,current_address_months_count,customer_age,days_since_request,intended_balcon_amount,payment_type,zip_count_4w,...,month,FE_01,FE_01_device_os_emails_prob,FE_02,FE_02_keep_alive_device_emails_prob,FE_03,FE_03_source_foreign_request_prob,FE_04,FE_04_device_os_foreign_request_prob,FE_05_device_os_prob
128,0,0.5,0.549786,-1,50,40,27.293091,-1.0,AC,1298,...,1,A,0.6047,A,0.352,A,0.1627,A,0.3027,0.3083
1422,1,0.7,0.174847,-1,61,70,0.003595,9.522029,AB,1486,...,2,A,0.6047,A,0.352,A,0.1627,A,0.3027,0.3083
1572,0,0.8,0.849118,-1,202,40,0.010027,-1.0,AC,3229,...,1,A,0.6047,A,0.352,A,0.1627,A,0.3027,0.3083
2380,0,0.7,0.366895,-1,300,50,0.017651,41.225517,AA,3009,...,3,A,0.6047,A,0.352,A,0.1627,A,0.3027,0.3083
2739,1,0.7,0.359558,-1,65,40,0.011045,-1.0,AC,673,...,5,A,0.6047,A,0.352,A,0.1627,A,0.3027,0.3083


In [29]:
# drop labelled columns 'FE_01' to 'FE_05'
data.drop(columns=['FE_01','FE_02','FE_03','FE_04'], inplace=True)

# change all other feature engineered columns to float instead of category
data['FE_01_device_os_emails_prob'] = data['FE_01_device_os_emails_prob'].astype('float')
data['FE_02_keep_alive_device_emails_prob'] = data['FE_02_keep_alive_device_emails_prob'].astype('float')
data['FE_03_source_foreign_request_prob'] = data['FE_03_source_foreign_request_prob'].astype('float')
data['FE_04_device_os_foreign_request_prob'] = data['FE_04_device_os_foreign_request_prob'].astype('float')

In [30]:
# print shape and head
print(f'data shape: {data.shape}')
data.head()

data shape: (52776, 36)


Unnamed: 0,fraud_bool,income,name_email_similarity,prev_address_months_count,current_address_months_count,customer_age,days_since_request,intended_balcon_amount,payment_type,zip_count_4w,...,session_length_in_minutes,device_os,keep_alive_session,device_distinct_emails_8w,month,FE_01_device_os_emails_prob,FE_02_keep_alive_device_emails_prob,FE_03_source_foreign_request_prob,FE_04_device_os_foreign_request_prob,FE_05_device_os_prob
128,0,0.5,0.549786,-1,50,40,27.293091,-1.0,AC,1298,...,4.508612,windows,0,0,1,0.6047,0.352,0.1627,0.3027,0.3083
1422,1,0.7,0.174847,-1,61,70,0.003595,9.522029,AB,1486,...,3.560275,windows,0,0,2,0.6047,0.352,0.1627,0.3027,0.3083
1572,0,0.8,0.849118,-1,202,40,0.010027,-1.0,AC,3229,...,3.535784,windows,0,0,1,0.6047,0.352,0.1627,0.3027,0.3083
2380,0,0.7,0.366895,-1,300,50,0.017651,41.225517,AA,3009,...,6.786212,windows,0,0,3,0.6047,0.352,0.1627,0.3027,0.3083
2739,1,0.7,0.359558,-1,65,40,0.011045,-1.0,AC,673,...,6.204049,windows,0,0,5,0.6047,0.352,0.1627,0.3027,0.3083


In [31]:
# print dtypes
data.dtypes

fraud_bool                                int64
income                                  float64
name_email_similarity                   float64
prev_address_months_count                 int64
current_address_months_count              int64
customer_age                              int64
days_since_request                      float64
intended_balcon_amount                  float64
payment_type                             object
zip_count_4w                              int64
velocity_6h                             float64
velocity_24h                            float64
velocity_4w                             float64
bank_branch_count_8w                      int64
date_of_birth_distinct_emails_4w          int64
employment_status                        object
credit_risk_score                         int64
email_is_free                             int64
housing_status                           object
phone_home_valid                          int64
phone_mobile_valid                      