In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import random
import time
import re
import string
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, classification_report

from nltk.corpus import stopwords
stop = stopwords.words('english')

import spacy
from spacy.util import minibatch, compounding

import torch

import warnings
warnings.filterwarnings(action="ignore")

In [28]:
# now we can start loading the file from here itself
df = pd.read_csv('cleaned_consumer_complaints.csv')

In [29]:
 # only the first 1000 rows to save time during training
df = df[['product', 'clean_text']][:1500]

In [30]:
df.head()

Unnamed: 0,product,clean_text
0,Debt collection,claimed owe years despite proof payment sent c...
1,Consumer Loan,due inconsistencies amount owed told bank amou...
2,Mortgage,wages earned job decreased almost half knew tr...
3,Mortgage,open current mortgage chase bank chase reporti...
4,Mortgage,submitted time submitted complaint dealt rushm...


In [31]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   product     1500 non-null   object
 1   clean_text  1500 non-null   object
dtypes: object(2)
memory usage: 23.6+ KB


In [32]:
# check the frequency of each category
100.0*df['product'].value_counts()/len(df)

Debt collection            25.600000
Mortgage                   25.400000
Credit reporting           14.600000
Credit card                12.466667
Bank account or service     8.133333
Consumer Loan               5.666667
Student loan                4.266667
Payday loan                 1.600000
Money transfers             1.400000
Prepaid card                0.666667
Other financial service     0.200000
Name: product, dtype: float64

### Objective
The idea here is to use the text in the 'consumer_complaint_narrative' column to categorise it to the right category 

### Prepare train/test/valid dataset

In [33]:
label_values = list(df['product'].unique())
label_values

['Debt collection',
 'Consumer Loan',
 'Mortgage',
 'Credit card',
 'Credit reporting',
 'Student loan',
 'Bank account or service',
 'Payday loan',
 'Money transfers',
 'Other financial service',
 'Prepaid card']

In [34]:
train_X, test_X, train_y, test_y = train_test_split(df['clean_text'],
                                                   df['product'],
                                                   test_size=0.15,
                                                   stratify=df['product'],
                                                    random_state=36
                                                   )

train_X, valid_X, train_y, valid_y = train_test_split(train_X,
                                                     train_y,
                                                     test_size=0.15,
                                                     stratify=train_y,
                                                      random_state=36
                                                     )

In [35]:
print('Shape of train_X:', train_X.shape)
print('Shape of train_y:', train_y.shape)

print('\nShape of test_X:', test_X.shape)
print('Shape of test_y:', test_y.shape)

print('\nShape of valid_X:', valid_X.shape)
print('Shape of valid_y:', valid_y.shape)

Shape of train_X: (1083,)
Shape of train_y: (1083,)

Shape of test_X: (225,)
Shape of test_y: (225,)

Shape of valid_X: (192,)
Shape of valid_y: (192,)


### Convert dataset to spacy compatible format

In [36]:
# one hot encode all the labels
train_y_df = pd.get_dummies(train_y)
test_y_df = pd.get_dummies(test_y)
valid_y_df = pd.get_dummies(valid_y)

In [37]:
train_y_df.head()

Unnamed: 0,Bank account or service,Consumer Loan,Credit card,Credit reporting,Debt collection,Money transfers,Mortgage,Other financial service,Payday loan,Prepaid card,Student loan
1153,0,0,0,0,0,0,1,0,0,0,0
65,0,0,1,0,0,0,0,0,0,0,0
99,0,0,0,1,0,0,0,0,0,0,0
1256,0,0,0,0,0,0,0,0,0,0,1
973,0,0,0,0,1,0,0,0,0,0,0


In [38]:
# convert data to text list and label dictionaries
train_texts = train_X.tolist()
train_cats = train_y_df.to_dict(orient='records')

test_texts = test_X.tolist()
test_cats = test_y_df.to_dict(orient='records')

valid_texts = valid_X.tolist()
valid_cats = valid_y_df.to_dict(orient='records')

In [39]:
# combine the text and labels to create data in spacy format
train_data = list(zip(train_texts, [{'cats': cats} for cats in train_cats]))
test_data = list(zip(test_texts, [{'cats': cats} for cats in test_cats]))
valid_data = list(zip(valid_texts, [{'cats': cats} for cats in valid_cats]))

In [40]:
# check
train_data[:2]

[('done multiple requests mortgage consider giving help heloc loan fulfilling requirements denied trying add mortgage xxxx deny kind help',
  {'cats': {'Bank account or service': 0,
    'Consumer Loan': 0,
    'Credit card': 0,
    'Credit reporting': 0,
    'Debt collection': 0,
    'Money transfers': 0,
    'Mortgage': 1,
    'Other financial service': 0,
    'Payday loan': 0,
    'Prepaid card': 0,
    'Student loan': 0}}),
 ('complaint regards credit card bank america payment made made insurance company purchased bank america event life changes problem made aware bank america longer conducts business result money paid insurance company well refund sent bank america behalf instead bank america sending check applied credit brought balance problem right none well however bring balance said payment credit said something paid monthly like went store purchased something returned case would credit service provided customers event unable pay bill issue bank america accepted payment charged

In [41]:
# check
test_data[:2]

[('paid back sent letter credit bureaus wife showing account satisfied full problem credit bureaus reporting account remark section settled less full amount asked remove remark account uploaded faxed documentation showing account satisfied full xxxx final process getting xxxx home remark still showing reports need remark removed',
  {'cats': {'Bank account or service': 0,
    'Consumer Loan': 0,
    'Credit card': 0,
    'Credit reporting': 1,
    'Debt collection': 0,
    'Money transfers': 0,
    'Mortgage': 0,
    'Payday loan': 0,
    'Prepaid card': 0,
    'Student loan': 0}}),
 ('mortgage company city mortgage would rather foreclose except monthly payments fighting mortgage company two years believe predatory lending fraud went modification signed modification still foreclosed',
  {'cats': {'Bank account or service': 0,
    'Consumer Loan': 0,
    'Credit card': 0,
    'Credit reporting': 0,
    'Debt collection': 0,
    'Money transfers': 0,
    'Mortgage': 1,
    'Payday loan':

In [42]:
# unpack the text and lables used for evaluation later
train_texts, train_labels = list(zip(*train_data))
test_texts, test_labels = list(zip(*test_data))
valid_texts, valid_labels = list(zip(*valid_data))

### Construct spacy model

In [43]:
def train_spacy(iterations, model_arch, dropout, learn_rate, output_dir):

    nlp = spacy.load('en_core_web_lg')

    textcat = nlp.create_pipe('textcat', config={'exclusive_classes':True, 'architecture':model_arch})
    nlp.add_pipe(textcat)

    for _, label in enumerate(label_values):
        textcat.add_label(label)

    pipe_exceptions = ['textcat']
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]

    with nlp.disable_pipes(*other_pipes):
    #     print(nlp.pipe_names)
        optimizer = nlp.begin_training()
        optimizer.learn_rate = learn_rate
        print('Training the model..')
        total_start_time = time.clock()

    for i in range(iterations):
        print('\nIteration:', str(i+1))
        start_time = time.clock()
        losses = {}
        true_labels = []
        pred_labels = []

        random.shuffle(train_data)
        batches = minibatch(train_data, size=compounding(4., 32., 1.001))
        for batch in batches:
            texts, annotations = zip(*batch)
            nlp.update(texts, annotations, sgd=optimizer, drop=dropout, losses=losses)

        with textcat.model.use_params(optimizer.averages):
            
            nlp.to_disk(output_dir)

            docs = [nlp.tokenizer(text) for text in valid_texts]

            for j, doc in enumerate(textcat.pipe(docs)):
                true_series = pd.Series(valid_labels[j]['cats'])
                true_label = true_series.idxmax()
                true_labels.append(true_label)

                pred_series = pd.Series(doc.cats)
                pred_label = pred_series.idxmax()
                pred_labels.append(pred_label)

            score_f1 = f1_score(true_labels, pred_labels, average='weighted')
            score_ac = accuracy_score(true_labels, pred_labels)
            
            print(classification_report(true_labels, pred_labels))
            print('\ntextcat_loss: {:.3f}\t f1_score: {:.3f}\t accuracy_score: {:.3f}'.format(losses['textcat'], score_f1, score_ac))

            print('\nElapsed time:', str(round((time.clock() - start_time)/60,2)) + ' minutes')
            
    print('\nTotal time:', str(round((time.clock() - total_start_time)/60,2)) + ' minutes')
            
    return nlp

In [44]:
# function to evaluate results on unseen test data
def evaluate(test_texts, test_labels, model):
    
    nlp = spacy.load(model)
    
    docs = [nlp.tokenizer(text) for text in test_texts]
    
    textcat = nlp.get_pipe('textcat')
    
    true_labels = []
    pred_labels = []

    for j, doc in enumerate(textcat.pipe(docs)):
        true_series = pd.Series(test_labels[j]['cats'])
        true_label = true_series.idxmax()
        true_labels.append(true_label)

        pred_series = pd.Series(doc.cats)
        pred_label = pred_series.idxmax()
        pred_labels.append(pred_label)
        
    print(classification_report(true_labels, pred_labels))

# Selecting model_architecture: 'ensemble'
## Learning_rate trials

### Learning_rate: 4e-3

In [45]:
# ensemble model architecture
train_spacy(10, 'ensemble', 0.2, 4e-3, 'ensemble_model')

Training the model..

Iteration: 1
                         precision    recall  f1-score   support

Bank account or service       0.33      0.38      0.35        16
          Consumer Loan       0.00      0.00      0.00        11
            Credit card       0.56      0.38      0.45        24
       Credit reporting       0.71      0.61      0.65        28
        Debt collection       0.51      0.84      0.64        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.85      0.94      0.89        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.00      0.00      0.00         8

               accuracy                           0.62       192
              macro avg       0.30      0.31      0.30       192
           weighted avg       0.55      0.62      0.57       192


textcat_loss: 11.710	 f1_score: 0.571	 accuracy_sco

                         precision    recall  f1-score   support

Bank account or service       0.60      0.56      0.58        16
          Consumer Loan       0.60      0.55      0.57        11
            Credit card       0.63      0.71      0.67        24
       Credit reporting       0.81      0.75      0.78        28
        Debt collection       0.83      0.82      0.82        49
        Money transfers       1.00      0.67      0.80         3
               Mortgage       0.92      0.96      0.94        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.45      0.62      0.53         8

               accuracy                           0.77       192
              macro avg       0.58      0.56      0.57       192
           weighted avg       0.76      0.77      0.76       192


textcat_loss: 2.320	 f1_score: 0.763	 accuracy_score: 0.766

Elapsed time: 5.97 minute

<spacy.lang.en.English at 0x7fef891b2a20>

In [46]:
# evaluate results on unseen test data
evaluate(test_texts, test_labels, 'ensemble_model')

                         precision    recall  f1-score   support

Bank account or service       0.65      0.61      0.63        18
          Consumer Loan       0.41      0.54      0.47        13
            Credit card       0.71      0.86      0.77        28
       Credit reporting       0.81      0.79      0.80        33
        Debt collection       0.81      0.79      0.80        58
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.93      0.89      0.91        57
            Payday loan       0.00      0.00      0.00         4
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.31      0.40      0.35        10

               accuracy                           0.75       225
              macro avg       0.46      0.49      0.47       225
           weighted avg       0.74      0.75      0.74       225



### Learning_rate: 3e-3

In [47]:
train_spacy(10, 'ensemble', 0.2, 3e-3, 'ensemble_model_3e-3')

Training the model..

Iteration: 1
                         precision    recall  f1-score   support

Bank account or service       0.00      0.00      0.00        16
          Consumer Loan       0.00      0.00      0.00        11
            Credit card       0.50      0.33      0.40        24
       Credit reporting       0.59      0.36      0.44        28
        Debt collection       0.44      0.86      0.58        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.76      0.98      0.86        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.00      0.00      0.00         8

               accuracy                           0.56       192
              macro avg       0.23      0.25      0.23       192
           weighted avg       0.45      0.56      0.48       192


textcat_loss: 12.257	 f1_score: 0.481	 accuracy_sco

                         precision    recall  f1-score   support

Bank account or service       0.55      0.69      0.61        16
          Consumer Loan       0.50      0.64      0.56        11
            Credit card       0.76      0.67      0.71        24
       Credit reporting       0.81      0.75      0.78        28
        Debt collection       0.82      0.76      0.79        49
        Money transfers       0.50      0.67      0.57         3
               Mortgage       0.85      0.96      0.90        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.40      0.25      0.31         8

               accuracy                           0.74       192
              macro avg       0.52      0.54      0.52       192
           weighted avg       0.74      0.74      0.74       192


textcat_loss: 2.218	 f1_score: 0.739	 accuracy_score: 0.745

Elapsed time: 4.69 minute

<spacy.lang.en.English at 0x7fef2162bb70>

In [48]:
evaluate(test_texts, test_labels, 'ensemble_model_3e-3')

                         precision    recall  f1-score   support

Bank account or service       0.54      0.78      0.64        18
          Consumer Loan       0.77      0.77      0.77        13
            Credit card       0.77      0.82      0.79        28
       Credit reporting       0.90      0.82      0.86        33
        Debt collection       0.81      0.81      0.81        58
        Money transfers       1.00      0.33      0.50         3
               Mortgage       0.85      0.93      0.89        57
            Payday loan       0.00      0.00      0.00         4
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.75      0.30      0.43        10

               accuracy                           0.79       225
              macro avg       0.64      0.56      0.57       225
           weighted avg       0.79      0.79      0.78       225



### Learning_rate: 2e-3

In [49]:
train_spacy(10, 'ensemble', 0.2, 2e-3, 'ensemble_model_2e-3')

Training the model..

Iteration: 1
                         precision    recall  f1-score   support

Bank account or service       0.00      0.00      0.00        16
          Consumer Loan       0.00      0.00      0.00        11
            Credit card       0.44      0.46      0.45        24
       Credit reporting       0.25      0.04      0.06        28
        Debt collection       0.53      0.92      0.67        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.59      0.94      0.72        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.00      0.00      0.00         8

               accuracy                           0.54       192
              macro avg       0.18      0.24      0.19       192
           weighted avg       0.38      0.54      0.42       192


textcat_loss: 12.434	 f1_score: 0.422	 accuracy_sco

                         precision    recall  f1-score   support

Bank account or service       0.75      0.75      0.75        16
          Consumer Loan       0.60      0.55      0.57        11
            Credit card       0.77      0.71      0.74        24
       Credit reporting       0.74      0.71      0.73        28
        Debt collection       0.87      0.80      0.83        49
        Money transfers       0.25      0.33      0.29         3
               Mortgage       0.83      1.00      0.91        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.67      0.75      0.71         8

               accuracy                           0.78       192
              macro avg       0.55      0.56      0.55       192
           weighted avg       0.77      0.78      0.77       192


textcat_loss: 1.605	 f1_score: 0.771	 accuracy_score: 0.781

Elapsed time: 6.35 minute

<spacy.lang.en.English at 0x7fee73de74e0>

In [50]:
evaluate(test_texts, test_labels, 'ensemble_model_2e-3')

                         precision    recall  f1-score   support

Bank account or service       0.62      0.83      0.71        18
          Consumer Loan       0.75      0.69      0.72        13
            Credit card       0.80      0.86      0.83        28
       Credit reporting       0.77      0.70      0.73        33
        Debt collection       0.85      0.76      0.80        58
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.85      0.93      0.89        57
            Payday loan       0.00      0.00      0.00         4
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.50      0.60      0.55        10

               accuracy                           0.77       225
              macro avg       0.51      0.54      0.52       225
           weighted avg       0.76      0.77      0.76       225



### Learning_rate: 1e-3

In [51]:
train_spacy(10, 'ensemble', 0.2, 1e-3, 'ensemble_model_1e-3')

Training the model..

Iteration: 1
                         precision    recall  f1-score   support

Bank account or service       0.42      0.81      0.55        16
          Consumer Loan       0.00      0.00      0.00        11
            Credit card       0.50      0.04      0.08        24
       Credit reporting       0.00      0.00      0.00        28
        Debt collection       0.46      0.80      0.59        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.61      0.92      0.73        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       1.00      0.12      0.22         8

               accuracy                           0.52       192
              macro avg       0.30      0.27      0.22       192
           weighted avg       0.41      0.52      0.40       192


textcat_loss: 11.694	 f1_score: 0.401	 accuracy_sco

                         precision    recall  f1-score   support

Bank account or service       0.73      0.69      0.71        16
          Consumer Loan       0.55      0.55      0.55        11
            Credit card       0.74      0.83      0.78        24
       Credit reporting       0.76      0.79      0.77        28
        Debt collection       0.85      0.80      0.82        49
        Money transfers       1.00      1.00      1.00         3
               Mortgage       0.89      0.98      0.93        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.67      0.50      0.57         8

               accuracy                           0.80       192
              macro avg       0.62      0.61      0.61       192
           weighted avg       0.78      0.80      0.79       192


textcat_loss: 1.765	 f1_score: 0.788	 accuracy_score: 0.797

Elapsed time: 9.67 minute

<spacy.lang.en.English at 0x7fedf47e7128>

In [52]:
evaluate(test_texts, test_labels, 'ensemble_model_1e-3')

                         precision    recall  f1-score   support

Bank account or service       0.55      0.67      0.60        18
          Consumer Loan       0.58      0.85      0.69        13
            Credit card       0.75      0.75      0.75        28
       Credit reporting       0.96      0.67      0.79        33
        Debt collection       0.79      0.83      0.81        58
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.85      0.91      0.88        57
            Payday loan       0.00      0.00      0.00         4
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.50      0.30      0.37        10

               accuracy                           0.75       225
              macro avg       0.50      0.50      0.49       225
           weighted avg       0.75      0.75      0.74       225



### Learning_rate: 3e-4

In [57]:
train_spacy(10, 'ensemble', 0.2, 3e-4, 'ensemble_model_3e-4')

Training the model..

Iteration: 1
                         precision    recall  f1-score   support

Bank account or service       0.00      0.00      0.00        16
          Consumer Loan       0.00      0.00      0.00        11
            Credit card       0.00      0.00      0.00        24
       Credit reporting       0.00      0.00      0.00        28
        Debt collection       0.26      1.00      0.41        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.00      0.00      0.00        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.00      0.00      0.00         8

               accuracy                           0.26       192
              macro avg       0.03      0.10      0.04       192
           weighted avg       0.07      0.26      0.10       192


textcat_loss: 13.106	 f1_score: 0.104	 accuracy_sco

                         precision    recall  f1-score   support

Bank account or service       0.55      0.69      0.61        16
          Consumer Loan       0.86      0.55      0.67        11
            Credit card       0.59      0.67      0.63        24
       Credit reporting       0.77      0.71      0.74        28
        Debt collection       0.84      0.88      0.86        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.89      0.98      0.93        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.29      0.25      0.27         8

               accuracy                           0.76       192
              macro avg       0.48      0.47      0.47       192
           weighted avg       0.74      0.76      0.74       192


textcat_loss: 4.234	 f1_score: 0.744	 accuracy_score: 0.760

Elapsed time: 2.07 minute

<spacy.lang.en.English at 0x7fec23838400>

In [58]:
evaluate(test_texts, test_labels, 'ensemble_model_3e-4')

                         precision    recall  f1-score   support

Bank account or service       0.35      0.39      0.37        18
          Consumer Loan       1.00      0.77      0.87        13
            Credit card       0.66      0.82      0.73        28
       Credit reporting       0.77      0.70      0.73        33
        Debt collection       0.80      0.83      0.81        58
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.86      0.88      0.87        57
            Payday loan       0.00      0.00      0.00         4
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.50      0.60      0.55        10

               accuracy                           0.74       225
              macro avg       0.49      0.50      0.49       225
           weighted avg       0.73      0.74      0.73       225



### Learning_rate: 3e-5

In [59]:
train_spacy(10, 'ensemble', 0.2, 3e-5, 'ensemble_model_3e-5')

Training the model..

Iteration: 1
                         precision    recall  f1-score   support

Bank account or service       0.00      0.00      0.00        16
          Consumer Loan       0.00      0.00      0.00        11
            Credit card       0.00      0.00      0.00        24
       Credit reporting       0.00      0.00      0.00        28
        Debt collection       0.00      0.00      0.00        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.26      1.00      0.41        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.00      0.00      0.00         8

               accuracy                           0.26       192
              macro avg       0.03      0.10      0.04       192
           weighted avg       0.07      0.26      0.10       192


textcat_loss: 14.059	 f1_score: 0.104	 accuracy_sco

                         precision    recall  f1-score   support

Bank account or service       0.00      0.00      0.00        16
          Consumer Loan       0.00      0.00      0.00        11
            Credit card       0.00      0.00      0.00        24
       Credit reporting       0.00      0.00      0.00        28
        Debt collection       0.26      1.00      0.41        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.00      0.00      0.00        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.00      0.00      0.00         8

               accuracy                           0.26       192
              macro avg       0.03      0.10      0.04       192
           weighted avg       0.07      0.26      0.10       192


textcat_loss: 12.872	 f1_score: 0.104	 accuracy_score: 0.255

Elapsed time: 1.45 minut

<spacy.lang.en.English at 0x7fec3ad85b00>

In [60]:
evaluate(test_texts, test_labels, 'ensemble_model_3e-5')

                         precision    recall  f1-score   support

Bank account or service       0.00      0.00      0.00        18
          Consumer Loan       0.00      0.00      0.00        13
            Credit card       0.00      0.00      0.00        28
       Credit reporting       0.00      0.00      0.00        33
        Debt collection       0.26      1.00      0.41        58
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.00      0.00      0.00        57
            Payday loan       0.00      0.00      0.00         4
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.00      0.00      0.00        10

               accuracy                           0.26       225
              macro avg       0.03      0.10      0.04       225
           weighted avg       0.07      0.26      0.11       225



### Learning_rate: 3e-2

In [61]:
train_spacy(10, 'ensemble', 0.2, 3e-2, 'ensemble_model_3e-2')

Training the model..

Iteration: 1
                         precision    recall  f1-score   support

Bank account or service       0.57      0.81      0.67        16
          Consumer Loan       0.00      0.00      0.00        11
            Credit card       0.37      0.62      0.46        24
       Credit reporting       0.61      0.71      0.66        28
        Debt collection       0.78      0.43      0.55        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.71      0.98      0.82        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.00      0.00      0.00         8

               accuracy                           0.61       192
              macro avg       0.30      0.36      0.32       192
           weighted avg       0.56      0.61      0.56       192


textcat_loss: 12.660	 f1_score: 0.559	 accuracy_sco

                         precision    recall  f1-score   support

Bank account or service       0.77      0.62      0.69        16
          Consumer Loan       0.00      0.00      0.00        11
            Credit card       0.64      0.67      0.65        24
       Credit reporting       0.73      0.29      0.41        28
        Debt collection       0.52      0.90      0.66        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.62      0.73      0.67        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.00      0.00      0.00         8

               accuracy                           0.59       192
              macro avg       0.33      0.32      0.31       192
           weighted avg       0.54      0.59      0.54       192


textcat_loss: 9.445	 f1_score: 0.538	 accuracy_score: 0.594

Elapsed time: 5.78 minute

<spacy.lang.en.English at 0x7feb73948828>

In [62]:
evaluate(test_texts, test_labels, 'ensemble_model_3e-2')

                         precision    recall  f1-score   support

Bank account or service       0.50      0.33      0.40        18
          Consumer Loan       0.00      0.00      0.00        13
            Credit card       0.50      0.46      0.48        28
       Credit reporting       0.82      0.42      0.56        33
        Debt collection       0.56      0.93      0.70        58
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.55      0.72      0.63        57
            Payday loan       0.00      0.00      0.00         4
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.00      0.00      0.00        10

               accuracy                           0.57       225
              macro avg       0.29      0.29      0.28       225
           weighted avg       0.51      0.57      0.51       225



### Learning_rate: 3e-1

In [66]:
train_spacy(10, 'ensemble', 0.2, 3e-1, 'ensemble_model_3e-1')

Training the model..

Iteration: 1
                         precision    recall  f1-score   support

Bank account or service       0.00      0.00      0.00        16
          Consumer Loan       0.00      0.00      0.00        11
            Credit card       0.12      1.00      0.22        24
       Credit reporting       0.00      0.00      0.00        28
        Debt collection       0.00      0.00      0.00        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.00      0.00      0.00        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.00      0.00      0.00         8

               accuracy                           0.12       192
              macro avg       0.01      0.10      0.02       192
           weighted avg       0.02      0.12      0.03       192


textcat_loss: 25.951	 f1_score: 0.028	 accuracy_sco

                         precision    recall  f1-score   support

Bank account or service       0.00      0.00      0.00        16
          Consumer Loan       0.14      0.09      0.11        11
            Credit card       0.00      0.00      0.00        24
       Credit reporting       0.10      0.18      0.13        28
        Debt collection       0.19      0.06      0.09        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.00      0.00      0.00        49
Other financial service       0.00      0.00      0.00         0
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.06      0.62      0.11         8

               accuracy                           0.07       192
              macro avg       0.04      0.09      0.04       192
           weighted avg       0.07      0.07      0.05       192


textcat_loss: 26.319	

<spacy.lang.en.English at 0x7fec6385f748>

In [67]:
evaluate(test_texts, test_labels, 'ensemble_model_3e-1')

                         precision    recall  f1-score   support

Bank account or service       0.00      0.00      0.00        18
          Consumer Loan       0.00      0.00      0.00        13
            Credit card       0.19      0.54      0.28        28
       Credit reporting       0.00      0.00      0.00        33
        Debt collection       0.30      0.14      0.19        58
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.26      0.54      0.35        57
            Payday loan       0.00      0.00      0.00         4
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.00      0.00      0.00        10

               accuracy                           0.24       225
              macro avg       0.07      0.12      0.08       225
           weighted avg       0.17      0.24      0.17       225



# Selecting model_architecture: 'ensemble', learning_rate: 3e-3
## Dropout trials

### Learn_rate: 3e-3, Dropout: 0.1

In [68]:
train_spacy(10, 'ensemble', 0.1, 3e-3, 'ensemble_model_3e-3_0.1')

Training the model..

Iteration: 1
                         precision    recall  f1-score   support

Bank account or service       0.80      0.25      0.38        16
          Consumer Loan       0.00      0.00      0.00        11
            Credit card       0.46      0.75      0.57        24
       Credit reporting       0.61      0.39      0.48        28
        Debt collection       0.59      0.88      0.70        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.81      0.94      0.87        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.00      0.00      0.00         8

               accuracy                           0.64       192
              macro avg       0.33      0.32      0.30       192
           weighted avg       0.57      0.64      0.57       192


textcat_loss: 12.053	 f1_score: 0.574	 accuracy_sco

                         precision    recall  f1-score   support

Bank account or service       0.59      0.62      0.61        16
          Consumer Loan       0.56      0.45      0.50        11
            Credit card       0.77      0.71      0.74        24
       Credit reporting       0.70      0.75      0.72        28
        Debt collection       0.84      0.73      0.78        49
        Money transfers       0.60      1.00      0.75         3
               Mortgage       0.87      0.94      0.90        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.56      0.62      0.59         8

               accuracy                           0.74       192
              macro avg       0.55      0.58      0.56       192
           weighted avg       0.75      0.74      0.74       192


textcat_loss: 1.526	 f1_score: 0.743	 accuracy_score: 0.745

Elapsed time: 4.78 minute

<spacy.lang.en.English at 0x7fea17c2acf8>

In [69]:
evaluate(test_texts, test_labels, 'ensemble_model_3e-3_0.1')

                         precision    recall  f1-score   support

Bank account or service       0.55      0.67      0.60        18
          Consumer Loan       0.53      0.62      0.57        13
            Credit card       0.71      0.79      0.75        28
       Credit reporting       0.83      0.73      0.77        33
        Debt collection       0.80      0.81      0.80        58
        Money transfers       0.40      0.67      0.50         3
               Mortgage       0.95      0.91      0.93        57
            Payday loan       0.33      0.25      0.29         4
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.83      0.50      0.62        10

               accuracy                           0.77       225
              macro avg       0.59      0.59      0.58       225
           weighted avg       0.78      0.77      0.77       225



### Learning_rate: 3e-3, Dropout: 0.3 -- PERFORMED THE BEST YET

In [70]:
train_spacy(10, 'ensemble', 0.3, 3e-3, 'ensemble_model_3e-3_0.3')

Training the model..

Iteration: 1
                         precision    recall  f1-score   support

Bank account or service       0.67      0.12      0.21        16
          Consumer Loan       0.00      0.00      0.00        11
            Credit card       0.17      0.04      0.07        24
       Credit reporting       0.75      0.54      0.63        28
        Debt collection       0.46      0.80      0.59        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.74      1.00      0.85        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.15      0.25      0.19         8

               accuracy                           0.56       192
              macro avg       0.29      0.27      0.25       192
           weighted avg       0.50      0.56      0.49       192


textcat_loss: 12.316	 f1_score: 0.492	 accuracy_sco

                         precision    recall  f1-score   support

Bank account or service       0.55      0.69      0.61        16
          Consumer Loan       0.67      0.55      0.60        11
            Credit card       0.63      0.71      0.67        24
       Credit reporting       0.85      0.79      0.81        28
        Debt collection       0.89      0.82      0.85        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.91      0.98      0.94        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.62      0.62      0.62         8

               accuracy                           0.78       192
              macro avg       0.51      0.51      0.51       192
           weighted avg       0.77      0.78      0.77       192


textcat_loss: 2.296	 f1_score: 0.771	 accuracy_score: 0.776

Elapsed time: 4.65 minute

<spacy.lang.en.English at 0x7fe974a824a8>

In [71]:
evaluate(test_texts, test_labels, 'ensemble_model_3e-3_0.3')

                         precision    recall  f1-score   support

Bank account or service       0.61      0.78      0.68        18
          Consumer Loan       0.65      0.85      0.73        13
            Credit card       0.69      0.89      0.78        28
       Credit reporting       0.89      0.73      0.80        33
        Debt collection       0.87      0.83      0.85        58
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.90      0.91      0.90        57
            Payday loan       0.50      0.25      0.33         4
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.57      0.40      0.47        10

               accuracy                           0.80       225
              macro avg       0.57      0.56      0.56       225
           weighted avg       0.79      0.80      0.79       225



### Learning_rate: 3e-3, Dropout: 0.4

In [72]:
train_spacy(10, 'ensemble', 0.4, 3e-3, 'ensemble_model_3e-3_0.4')

Training the model..

Iteration: 1
                         precision    recall  f1-score   support

Bank account or service       0.00      0.00      0.00        16
          Consumer Loan       0.00      0.00      0.00        11
            Credit card       0.00      0.00      0.00        24
       Credit reporting       0.50      0.04      0.07        28
        Debt collection       0.36      0.90      0.52        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.70      0.98      0.81        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.00      0.00      0.00         8

               accuracy                           0.48       192
              macro avg       0.16      0.19      0.14       192
           weighted avg       0.34      0.48      0.35       192


textcat_loss: 13.124	 f1_score: 0.349	 accuracy_sco

                         precision    recall  f1-score   support

Bank account or service       0.54      0.88      0.67        16
          Consumer Loan       0.64      0.64      0.64        11
            Credit card       0.70      0.67      0.68        24
       Credit reporting       0.87      0.71      0.78        28
        Debt collection       0.84      0.73      0.78        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.86      0.98      0.91        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.50      0.62      0.56         8

               accuracy                           0.76       192
              macro avg       0.49      0.52      0.50       192
           weighted avg       0.75      0.76      0.75       192


textcat_loss: 4.314	 f1_score: 0.748	 accuracy_score: 0.760

Elapsed time: 5.37 minute

<spacy.lang.en.English at 0x7fe8acd35ac8>

In [74]:
evaluate(test_texts, test_labels, 'ensemble_model_3e-3_0.4')

                         precision    recall  f1-score   support

Bank account or service       0.58      0.83      0.68        18
          Consumer Loan       0.55      0.92      0.69        13
            Credit card       0.73      0.68      0.70        28
       Credit reporting       0.92      0.73      0.81        33
        Debt collection       0.79      0.76      0.77        58
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.87      0.91      0.89        57
            Payday loan       0.00      0.00      0.00         4
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.44      0.40      0.42        10

               accuracy                           0.76       225
              macro avg       0.49      0.52      0.50       225
           weighted avg       0.75      0.76      0.74       225



### Learning_rate: 3e-3, Dropout: 0.5

In [75]:
train_spacy(10, 'ensemble', 0.5, 3e-3, 'ensemble_model_3e-3_0.5')

Training the model..

Iteration: 1
                         precision    recall  f1-score   support

Bank account or service       0.00      0.00      0.00        16
          Consumer Loan       0.00      0.00      0.00        11
            Credit card       0.00      0.00      0.00        24
       Credit reporting       0.00      0.00      0.00        28
        Debt collection       0.44      0.82      0.58        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.41      0.86      0.56        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.00      0.00      0.00         8

               accuracy                           0.43       192
              macro avg       0.09      0.17      0.11       192
           weighted avg       0.22      0.43      0.29       192


textcat_loss: 13.433	 f1_score: 0.289	 accuracy_sco

                         precision    recall  f1-score   support

Bank account or service       0.44      0.75      0.56        16
          Consumer Loan       0.00      0.00      0.00        11
            Credit card       0.49      0.71      0.58        24
       Credit reporting       0.83      0.71      0.77        28
        Debt collection       0.85      0.67      0.75        49
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.73      0.96      0.83        49
            Payday loan       0.00      0.00      0.00         3
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.00      0.00      0.00         8

               accuracy                           0.67       192
              macro avg       0.33      0.38      0.35       192
           weighted avg       0.62      0.67      0.63       192


textcat_loss: 6.893	 f1_score: 0.634	 accuracy_score: 0.672

Elapsed time: 4.01 minute

<spacy.lang.en.English at 0x7fe873b1b828>

In [76]:
evaluate(test_texts, test_labels, 'ensemble_model_3e-3_0.5')

                         precision    recall  f1-score   support

Bank account or service       0.41      0.72      0.52        18
          Consumer Loan       0.00      0.00      0.00        13
            Credit card       0.49      0.64      0.55        28
       Credit reporting       0.91      0.64      0.75        33
        Debt collection       0.90      0.74      0.81        58
        Money transfers       0.00      0.00      0.00         3
               Mortgage       0.68      0.91      0.78        57
            Payday loan       0.00      0.00      0.00         4
           Prepaid card       0.00      0.00      0.00         1
           Student loan       0.11      0.10      0.11        10

               accuracy                           0.66       225
              macro avg       0.35      0.38      0.35       225
           weighted avg       0.64      0.66      0.63       225

