# Chapter 10: Deep learning in Finance

In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [2]:
import matplotlib.pyplot as plt
import warnings

plt.style.use('seaborn')
plt.rcParams['figure.figsize'] = [16, 9]
plt.rcParams['figure.dpi'] = 300
warnings.simplefilter(action='ignore', category=FutureWarning)

# Deep learning for tabular data 

In [3]:
from fastai import *
from fastai.tabular import *
import torch
import pandas as pd

In [4]:
from chapter_10_utils import performance_evaluation_report

In [5]:
#Load the dataset from CSV file
df = pd.read_csv('credit_card_default.csv', index_col=0, na_values='')

In [6]:
df.payment_status_sep.unique()

array(['Payment delayed 2 months', 'Payed duly', 'Unknown', 'Payment delayed 1 month', 'Payment delayed 3 months',
       'Payment delayed 4 months', 'Payment delayed 8 months', 'Payment delayed 7 months', 'Payment delayed 5 months',
       'Payment delayed 6 months'], dtype=object)

In [7]:
#Identify the dependent variabe and numerical/categorical features
der_var = 'default_payment_next_month'

num_features = list(df.select_dtypes('number').columns)
num_features.remove(der_var)
cat_features = list(df.select_dtypes('object').columns)

preprocessing = [FillMissing, Categorify, Normalize]

In [8]:
#Create TabularDataBunch from the DataFrame
data = (TabularList.from_df(df,
                           cat_names=cat_features, 
                           cont_names = num_features,
                           procs = preprocessing)
                   .split_by_rand_pct(valid_pct=0.2, seed=42)
                   .label_from_df(cols=der_var)
                   .databunch(num_workers=0))

In [9]:
#Inspect rows:
data.show_batch(rows=5)

sex,education,marriage,payment_status_sep,payment_status_aug,payment_status_jul,payment_status_jun,payment_status_may,payment_status_apr,age_na,limit_bal,age,bill_statement_sep,bill_statement_aug,bill_statement_jul,bill_statement_jun,bill_statement_may,bill_statement_apr,previous_payment_sep,previous_payment_aug,previous_payment_jul,previous_payment_jun,previous_payment_may,previous_payment_apr,target
Female,University,Single,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,False,0.8693,-1.0303,-0.3398,-0.3083,-0.2692,-0.2166,-0.1698,-0.1357,-0.2266,-0.1714,-0.1896,-0.2074,-0.2125,-0.2096,0
Female,Others,Married,Unknown,Unknown,Unknown,Payed duly,Unknown,Unknown,False,-0.0557,0.7066,-0.7154,-0.646,-0.675,-0.6395,-0.6451,-0.6539,-0.0027,-0.2455,-0.1765,-0.2994,-0.3085,-0.2955,0
Male,University,Single,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,False,-0.364,-0.596,0.3608,0.3062,0.2082,0.1432,0.1513,0.0393,-0.0393,-0.1219,-0.1786,-0.1767,-0.2125,-0.2096,0
Female,University,Married,Payment delayed 2 months,Payment delayed 2 months,Payment delayed 2 months,Payment delayed 2 months,Payment delayed 2 months,Payment delayed 2 months,False,-0.7494,1.6836,-0.0956,-0.0565,-0.0142,0.0561,0.1213,0.1638,-0.2149,-0.1672,-0.1841,-0.189,-0.1869,-0.1867,0
Female,High school,Married,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,False,-0.0557,0.3809,-0.6109,-0.6562,-0.675,-0.6714,-0.6622,-0.6539,-0.1915,-0.2455,-0.2882,-0.2994,-0.3085,-0.2955,0


In [10]:
#Define learner object
learn = tabular_learner(data, layers=[1000,500],
                        ps=[0.001, 0.01],
                        emb_drop=0.04,
                        metrics=[Recall(),
                                 FBeta(beta=1),
                                 FBeta(beta=5)])

In [11]:
learn.model

TabularModel(
  (embeds): ModuleList(
    (0): Embedding(3, 3)
    (1): Embedding(5, 4)
    (2): Embedding(4, 3)
    (3): Embedding(11, 6)
    (4): Embedding(11, 6)
    (5): Embedding(11, 6)
    (6): Embedding(11, 6)
    (7): Embedding(10, 6)
    (8): Embedding(10, 6)
    (9): Embedding(3, 3)
  )
  (emb_drop): Dropout(p=0.04, inplace=False)
  (bn_cont): BatchNorm1d(14, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layers): Sequential(
    (0): Linear(in_features=63, out_features=1000, bias=True)
    (1): ReLU(inplace=True)
    (2): BatchNorm1d(1000, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.001, inplace=False)
    (4): Linear(in_features=1000, out_features=500, bias=True)
    (5): ReLU(inplace=True)
    (6): BatchNorm1d(500, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): Dropout(p=0.01, inplace=False)
    (8): Linear(in_features=500, out_features=2, bias=True)
  )
)

In [None]:
#Find suggested learning rate:
learn.lr_find()
learn.recorder.plot(suggestion=True)


epoch,train_loss,valid_loss,recall,f_beta,f_beta.1,time


In [None]:
#Train neural network
learn.fit(epochs=25, lr=1e-06, wd=0.2)

In [None]:
#Extract predictions for the validation set
preds_valid, _ = learn.get_preds(ds_type=DatasetType.Valid)
pred_valid = preds_valid.argmax(dim=-1)


In [None]:
#Inspect the performance 
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix()

In [None]:
#Inspect performance evaluation metrics 
performance_evaluation_report(learn)