In [1]:
import warnings; 
warnings.filterwarnings("ignore")
warnings.simplefilter(action="ignore",category=UserWarning)
warnings.simplefilter(action="ignore",category=FutureWarning)

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import matplotlib.pyplot as plt
plt.style.use('seaborn-v0_8-muted')
# plt.rcParams['figure.figsize'] = (10, 10)
plt.rcParams['grid.linestyle'] = ':'   
plt.rcParams['axes.grid'] = False

%config InlineBackend.figure_formats = {'png', 'retina'}

import pandas as pd
import numpy as np


# LDA

Load financial data

In [2]:
fin_df = pd.read_excel('data/supervised-learning.xlsx', 
                       sheet_name='Financial', usecols=['Status','X1','X2','X3'], header=0)

In [3]:
fin_df.head()
fin_df.Status.value_counts()

Unnamed: 0,Status,X1,X2,X3
0,C1-Solvent,43.0,16.4,1.3
1,C1-Solvent,47.0,16.0,1.9
2,C1-Solvent,-3.3,4.0,2.7
3,C1-Solvent,35.0,20.8,1.9
4,C1-Solvent,46.7,12.6,0.9


Status
C1-Solvent     33
C2-Bankrupt    33
Name: count, dtype: int64

LDA model fitting

In [4]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

y = fin_df['Status']
X_fin = fin_df.drop(['Status'], axis=1)

lda_clf = LDA()
lda_clf.fit(X_fin, y)

Predict the target values

In [5]:
y_pred = lda_clf.predict(X_fin)
y_pred

array(['C1-Solvent', 'C1-Solvent', 'C1-Solvent', 'C1-Solvent',
       'C1-Solvent', 'C1-Solvent', 'C1-Solvent', 'C1-Solvent',
       'C1-Solvent', 'C1-Solvent', 'C1-Solvent', 'C1-Solvent',
       'C1-Solvent', 'C1-Solvent', 'C1-Solvent', 'C1-Solvent',
       'C1-Solvent', 'C1-Solvent', 'C2-Bankrupt', 'C1-Solvent',
       'C1-Solvent', 'C1-Solvent', 'C1-Solvent', 'C1-Solvent',
       'C1-Solvent', 'C1-Solvent', 'C1-Solvent', 'C1-Solvent',
       'C1-Solvent', 'C1-Solvent', 'C1-Solvent', 'C1-Solvent',
       'C1-Solvent', 'C2-Bankrupt', 'C2-Bankrupt', 'C2-Bankrupt',
       'C2-Bankrupt', 'C2-Bankrupt', 'C2-Bankrupt', 'C2-Bankrupt',
       'C2-Bankrupt', 'C1-Solvent', 'C2-Bankrupt', 'C2-Bankrupt',
       'C2-Bankrupt', 'C2-Bankrupt', 'C1-Solvent', 'C2-Bankrupt',
       'C2-Bankrupt', 'C1-Solvent', 'C2-Bankrupt', 'C2-Bankrupt',
       'C2-Bankrupt', 'C2-Bankrupt', 'C2-Bankrupt', 'C2-Bankrupt',
       'C2-Bankrupt', 'C1-Solvent', 'C1-Solvent', 'C2-Bankrupt',
       'C2-Bankrupt', 'C2-Bankru

Determine the predicted probabilites

In [6]:
y_pred_probs = lda_clf.predict_proba(X_fin)
y_pred_probs.round(3)

array([[0.878, 0.122],
       [0.947, 0.053],
       [0.888, 0.112],
       [0.938, 0.062],
       [0.802, 0.198],
       [0.935, 0.065],
       [0.903, 0.097],
       [0.912, 0.088],
       [0.952, 0.048],
       [0.996, 0.004],
       [1.   , 0.   ],
       [0.963, 0.037],
       [0.806, 0.194],
       [0.94 , 0.06 ],
       [0.68 , 0.32 ],
       [0.986, 0.014],
       [0.992, 0.008],
       [0.919, 0.081],
       [0.447, 0.553],
       [0.703, 0.297],
       [0.897, 0.103],
       [0.942, 0.058],
       [0.955, 0.045],
       [0.752, 0.248],
       [0.941, 0.059],
       [0.897, 0.103],
       [0.957, 0.043],
       [0.961, 0.039],
       [0.937, 0.063],
       [0.839, 0.161],
       [0.953, 0.047],
       [0.726, 0.274],
       [0.7  , 0.3  ],
       [0.011, 0.989],
       [0.457, 0.543],
       [0.004, 0.996],
       [0.149, 0.851],
       [0.08 , 0.92 ],
       [0.039, 0.961],
       [0.182, 0.818],
       [0.001, 0.999],
       [0.538, 0.462],
       [0.03 , 0.97 ],
       [0.0

Evaluate model performance on train data

In [7]:
from sklearn.metrics import confusion_matrix, classification_report

y_pred = lda_clf.predict(X_fin)
print(confusion_matrix(y, y_pred))
print(classification_report(y, y_pred))

[[32  1]
 [ 5 28]]
              precision    recall  f1-score   support

  C1-Solvent       0.86      0.97      0.91        33
 C2-Bankrupt       0.97      0.85      0.90        33

    accuracy                           0.91        66
   macro avg       0.92      0.91      0.91        66
weighted avg       0.92      0.91      0.91        66



# QDA

Load the financial data

In [8]:
fin_df = pd.read_excel('data/supervised-learning.xlsx', 
                       sheet_name='Financial', usecols=['Status','X1','X2','X3'], header=0)

Prepare the input data

In [9]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA

y = fin_df['Status']
X = fin_df.drop('Status', axis=1)

<font color='darkorange'>Fit QDA model to data</font>

In [10]:
qda_clf = QDA()
qda_clf.fit(X, y)

<font color='darkorange'>Get the predicted values</font>

In [11]:
y_pred = qda_clf.predict(X)
y_pred

array(['C1-Solvent', 'C1-Solvent', 'C1-Solvent', 'C1-Solvent',
       'C1-Solvent', 'C1-Solvent', 'C1-Solvent', 'C1-Solvent',
       'C1-Solvent', 'C1-Solvent', 'C1-Solvent', 'C1-Solvent',
       'C1-Solvent', 'C1-Solvent', 'C1-Solvent', 'C1-Solvent',
       'C1-Solvent', 'C1-Solvent', 'C2-Bankrupt', 'C1-Solvent',
       'C1-Solvent', 'C1-Solvent', 'C1-Solvent', 'C1-Solvent',
       'C1-Solvent', 'C1-Solvent', 'C1-Solvent', 'C1-Solvent',
       'C1-Solvent', 'C1-Solvent', 'C1-Solvent', 'C1-Solvent',
       'C1-Solvent', 'C2-Bankrupt', 'C1-Solvent', 'C2-Bankrupt',
       'C2-Bankrupt', 'C2-Bankrupt', 'C2-Bankrupt', 'C2-Bankrupt',
       'C2-Bankrupt', 'C1-Solvent', 'C2-Bankrupt', 'C2-Bankrupt',
       'C2-Bankrupt', 'C2-Bankrupt', 'C2-Bankrupt', 'C2-Bankrupt',
       'C2-Bankrupt', 'C2-Bankrupt', 'C2-Bankrupt', 'C2-Bankrupt',
       'C2-Bankrupt', 'C2-Bankrupt', 'C2-Bankrupt', 'C2-Bankrupt',
       'C2-Bankrupt', 'C2-Bankrupt', 'C2-Bankrupt', 'C2-Bankrupt',
       'C2-Bankrupt', 'C2-Ban

In [12]:
from sklearn.metrics import confusion_matrix, classification_report

print(confusion_matrix(y, y_pred))
print(classification_report(y, y_pred))

[[32  1]
 [ 2 31]]
              precision    recall  f1-score   support

  C1-Solvent       0.94      0.97      0.96        33
 C2-Bankrupt       0.97      0.94      0.95        33

    accuracy                           0.95        66
   macro avg       0.95      0.95      0.95        66
weighted avg       0.95      0.95      0.95        66



Get the probabilities of the predicted values

In [13]:
y_pred_probs = qda_clf.predict_proba(X)
y_pred_probs.round(3)

array([[0.974, 0.026],
       [0.99 , 0.01 ],
       [0.939, 0.061],
       [0.992, 0.008],
       [0.951, 0.049],
       [0.995, 0.005],
       [0.977, 0.023],
       [0.99 , 0.01 ],
       [0.913, 0.087],
       [1.   , 0.   ],
       [1.   , 0.   ],
       [0.992, 0.008],
       [0.951, 0.049],
       [0.946, 0.054],
       [0.94 , 0.06 ],
       [0.999, 0.001],
       [1.   , 0.   ],
       [0.991, 0.009],
       [0.458, 0.542],
       [0.888, 0.112],
       [0.976, 0.024],
       [0.988, 0.012],
       [0.994, 0.006],
       [0.941, 0.059],
       [0.978, 0.022],
       [0.951, 0.049],
       [0.994, 0.006],
       [0.99 , 0.01 ],
       [0.968, 0.032],
       [0.97 , 0.03 ],
       [0.954, 0.046],
       [0.859, 0.141],
       [0.765, 0.235],
       [0.   , 1.   ],
       [0.6  , 0.4  ],
       [0.   , 1.   ],
       [0.001, 0.999],
       [0.   , 1.   ],
       [0.   , 1.   ],
       [0.007, 0.993],
       [0.   , 1.   ],
       [0.831, 0.169],
       [0.   , 1.   ],
       [0. 

Make predictions from the prediction probabilities at a given decision threshold.

In [17]:
DECISION_TH = 0.5
y_pred_probs = lda_clf.predict_proba(X)[:,1]
y_pred = ["C1-Solvent" if x >= DECISION_TH else "C2-Bankrupt" for x in y_pred_probs]
y_pred

['C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C1-Solvent',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C1-Solvent',
 'C1-Solvent',
 'C1-Solvent',
 'C1-Solvent',
 'C1-Solvent',
 'C1-Solvent',
 'C1-Solvent',
 'C1-Solvent',
 'C2-Bankrupt',
 'C1-Solvent',
 'C1-Solvent',
 'C1-Solvent',
 'C1-Solvent',
 'C2-Bankrupt',
 'C1-Solvent',
 'C1-Solvent',
 'C2-Bankrupt',
 'C1-Solvent',
 'C1-Solvent',
 'C1-Solvent',
 'C1-Solvent',
 'C1-Solvent',
 'C1-Solvent',
 'C1-Solvent',
 'C2-Bankrupt',
 'C2-Bankrupt',
 'C1-Solvent',
 'C1-Solvent',
 'C1-Solvent',
 'C1-Solvent',
 'C1-Solvent',
 'C