<a id="section10"></a>
# <font color="#004D7F" size=5> 1.0. System setup</font>

In [1]:
!pip install lazypredict

Collecting lazypredict
  Downloading lazypredict-0.2.12-py2.py3-none-any.whl (12 kB)
Installing collected packages: lazypredict
Successfully installed lazypredict-0.2.12


In [2]:
import os
import lazypredict
import pandas as pd
import numpy as np

from google.colab import drive

np.random.seed(42)

drive.mount('/content/drive')
dataset_path = "/content/drive/MyDrive/Máster UNED/TFM/Datasets/Data_for_UCI_named.csv"

Mounted at /content/drive


<a id="section11"></a>
# <font color="#004D7F" size=5> 1.1. Read the dataset</font>

In [3]:
#Read CSV
df=pd.read_csv(dataset_path, delimiter=';')

df.pop('stab')

column_to_move = df.pop('stabf')
df['stabf'] = column_to_move

class_col = df.iloc[:,-1]
df = df.iloc[: , :-1]
df['stabf'] = class_col
df

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stabf
0,2.959060,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.781760,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.277210,-0.920492,0.163041,0.766689,0.839444,0.109853,unstable
3,0.716415,7.669600,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.797110,0.455450,0.656947,0.820923,unstable
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2.930406,9.487627,2.376523,6.187797,3.343416,-0.658054,-1.449106,-1.236256,0.601709,0.779642,0.813512,0.608385,unstable
9996,3.392299,1.274827,2.954947,6.894759,4.349512,-1.663661,-0.952437,-1.733414,0.502079,0.567242,0.285880,0.366120,stable
9997,2.364034,2.842030,8.776391,1.008906,4.299976,-1.380719,-0.943884,-1.975373,0.487838,0.986505,0.149286,0.145984,stable
9998,9.631511,3.994398,2.757071,7.821347,2.514755,-0.966330,-0.649915,-0.898510,0.365246,0.587558,0.889118,0.818391,unstable


In [4]:
df_x = df.drop('stabf', axis = 1)
df_y = df['stabf']

In [5]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
df_y = label_encoder.fit_transform(df_y)

labels = label_encoder.classes_

for label, integer_value in zip(labels, range(len(labels))):
    print(f"Label: {label} -> Integer Value: {integer_value}")

Label: stable -> Integer Value: 0
Label: unstable -> Integer Value: 1


<a id="section11"></a>
# <font color="#004D7F" size=5> 1.2. Preparing experimentation</font>

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size = 0.30, random_state = 42,stratify=df_y)
#X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size = 0.30, random_state = 42,stratify=y_test)

In [7]:
df_train = pd.concat([X_train, pd.DataFrame({'stabf': y_train})], axis = 1)
df_test = pd.concat([X_test, pd.DataFrame({'stabf': y_test})], axis = 1)
#df_val = pd.concat([X_val, pd.DataFrame({'class': y_val})], axis = 1)

In [None]:
print(df_train['stabf'].value_counts())
print(' ')
print(' ')
print(df_test['stabf'].value_counts())
print(' ')

1.0    4466
0.0    2534
Name: stabf, dtype: int64
 
 
1.0    1914
0.0    1086
Name: stabf, dtype: int64
 


<a id="section21"></a>
# <font color="#004D7F" size=5> 2.1. Logistic Regression</font>

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

model = LogisticRegression(solver='liblinear', max_iter=1000)
model.fit(X_train, y_train)

y_pred_test = model.predict(X_test)

# Calculate test metrics
test_accuracy = accuracy_score(y_test, y_pred_test)
test_auc = roc_auc_score(y_test, y_pred_test)
test_precision = precision_score(y_test, y_pred_test)
test_recall = recall_score(y_test, y_pred_test)

# Predict on the training set
y_pred_train = model.predict(X_train)

# Calculate training metrics
train_accuracy = accuracy_score(y_train, y_pred_train)
train_auc = roc_auc_score(y_train, y_pred_train)
train_precision = precision_score(y_train, y_pred_train)
train_recall = recall_score(y_train, y_pred_train)

# Calculate the log-loss for the training set
train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))

# Predict on the validation set
y_pred_val = model.predict(X_val)

# Calculate validation metrics
validation_accuracy = accuracy_score(y_val, y_pred_val)
validation_auc = roc_auc_score(y_val, y_pred_val)
validation_precision = precision_score(y_val, y_pred_val)
validation_recall = recall_score(y_val, y_pred_val)

# Calculate the log-loss for the validation set
validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))

# Print the metrics
print(f"Test accuracy: {test_accuracy}")
print(f"Test AUC: {test_auc}")
print(f"Test precision: {test_precision}")
print(f"Test recall: {test_recall}")
print(f"Train accuracy: {train_accuracy}")
print(f"Train AUC: {train_auc}")
print(f"Train precision: {train_precision}")
print(f"Train recall: {train_recall}")
print(f"Train loss: {train_loss}")
print(f"Validation accuracy: {validation_accuracy}")
print(f"Validation AUC: {validation_auc}")
print(f"Validation precision: {validation_precision}")
print(f"Validation recall: {validation_recall}")
print(f"Validation loss: {validation_loss}")

Test accuracy: 0.9615384615384616
Test AUC: 0.9585326953748006
Test precision: 0.9473684210526315
Test recall: 0.9473684210526315
Train accuracy: 0.9899497487437185
Train AUC: 0.9878648648648649
Train precision: 0.9931506849315068
Train recall: 0.9797297297297297
Train loss: nan
Validation accuracy: 0.9663865546218487
Validation AUC: 0.9642642642642645
Validation precision: 0.9555555555555556
Validation recall: 0.9555555555555556
Validation loss: nan


  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))


<a id="section22"></a>
# <font color="#004D7F" size=5> 2.2. Gaussian Naive Bayes</font>

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score

model = GaussianNB()

model.fit(X_train,y_train)
y_pred_test = model.predict(X_test)

# Calculate test metrics
test_accuracy = accuracy_score(y_test, y_pred_test)
test_auc = roc_auc_score(y_test, y_pred_test)
test_precision = precision_score(y_test, y_pred_test)
test_recall = recall_score(y_test, y_pred_test)

# Predict on the training set
y_pred_train = model.predict(X_train)

# Calculate training metrics
train_accuracy = accuracy_score(y_train, y_pred_train)
train_auc = roc_auc_score(y_train, y_pred_train)
train_precision = precision_score(y_train, y_pred_train)
train_recall = recall_score(y_train, y_pred_train)

# Calculate the log-loss for the training set
train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))

# Predict on the validation set
y_pred_val = model.predict(X_val)

# Calculate validation metrics
validation_accuracy = accuracy_score(y_val, y_pred_val)
validation_auc = roc_auc_score(y_val, y_pred_val)
validation_precision = precision_score(y_val, y_pred_val)
validation_recall = recall_score(y_val, y_pred_val)

# Calculate the log-loss for the validation set
validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))

# Print the metrics
print(f"Test accuracy: {test_accuracy}")
print(f"Test AUC: {test_auc}")
print(f"Test precision: {test_precision}")
print(f"Test recall: {test_recall}")
print(f"Train accuracy: {train_accuracy}")
print(f"Train AUC: {train_auc}")
print(f"Train precision: {train_precision}")
print(f"Train recall: {train_recall}")
print(f"Train loss: {train_loss}")
print(f"Validation accuracy: {validation_accuracy}")
print(f"Validation AUC: {validation_auc}")
print(f"Validation precision: {validation_precision}")
print(f"Validation recall: {validation_recall}")
print(f"Validation loss: {validation_loss}")

Test accuracy: 0.9423076923076923
Test AUC: 0.9322169059011166
Test precision: 0.9444444444444444
Test recall: 0.8947368421052632
Train accuracy: 0.9346733668341709
Train AUC: 0.9231891891891892
Train precision: 0.9420289855072463
Train recall: 0.8783783783783784
Train loss: nan
Validation accuracy: 0.907563025210084
Validation AUC: 0.916966966966967
Validation precision: 0.8269230769230769
Validation recall: 0.9555555555555556
Validation loss: nan


  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))


<a id="section23"></a>
# <font color="#004D7F" size=5> 2.3. Decision Tree</font>

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score

model = DecisionTreeClassifier()

model.fit(X_train,y_train)
y_pred_test = model.predict(X_test)

# Calculate test metrics
test_accuracy = accuracy_score(y_test, y_pred_test)
test_auc = roc_auc_score(y_test, y_pred_test)
test_precision = precision_score(y_test, y_pred_test)
test_recall = recall_score(y_test, y_pred_test)

# Predict on the training set
y_pred_train = model.predict(X_train)

# Calculate training metrics
train_accuracy = accuracy_score(y_train, y_pred_train)
train_auc = roc_auc_score(y_train, y_pred_train)
train_precision = precision_score(y_train, y_pred_train)
train_recall = recall_score(y_train, y_pred_train)

# Calculate the log-loss for the training set
train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))

# Predict on the validation set
y_pred_val = model.predict(X_val)

# Calculate validation metrics
validation_accuracy = accuracy_score(y_val, y_pred_val)
validation_auc = roc_auc_score(y_val, y_pred_val)
validation_precision = precision_score(y_val, y_pred_val)
validation_recall = recall_score(y_val, y_pred_val)

# Calculate the log-loss for the validation set
validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))

# Print the metrics
print(f"Test accuracy: {test_accuracy}")
print(f"Test AUC: {test_auc}")
print(f"Test precision: {test_precision}")
print(f"Test recall: {test_recall}")
print(f"Train accuracy: {train_accuracy}")
print(f"Train AUC: {train_auc}")
print(f"Train precision: {train_precision}")
print(f"Train recall: {train_recall}")
print(f"Train loss: {train_loss}")
print(f"Validation accuracy: {validation_accuracy}")
print(f"Validation AUC: {validation_auc}")
print(f"Validation precision: {validation_precision}")
print(f"Validation recall: {validation_recall}")
print(f"Validation loss: {validation_loss}")

Test accuracy: 1.0
Test AUC: 1.0
Test precision: 1.0
Test recall: 1.0
Train accuracy: 1.0
Train AUC: 1.0
Train precision: 1.0
Train recall: 1.0
Train loss: nan
Validation accuracy: 1.0
Validation AUC: 1.0
Validation precision: 1.0
Validation recall: 1.0
Validation loss: nan


<a id="section24"></a>
# <font color="#004D7F" size=5> 2.4. Random Forest</font>

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score

model = RandomForestClassifier()

parameters = {'n_estimators': [4, 6, 9, 10, 15],
              'max_features': ['log2', 'sqrt'],
              'criterion': ['entropy', 'gini'],
              'max_depth': [2, 3, 5, 10],
              'min_samples_split': [2, 3, 5],
              'min_samples_leaf': [1, 5, 8]
             }

# Run the grid search
grid_obj = GridSearchCV(model, parameters)
grid_obj = grid_obj.fit(X_train, y_train)

# Set the model to the best combination of parameters
model = grid_obj.best_estimator_

model.fit(X_train,y_train)
y_pred_test = model.predict(X_test)

# Calculate test metrics
test_accuracy = accuracy_score(y_test, y_pred_test)
test_auc = roc_auc_score(y_test, y_pred_test)
test_precision = precision_score(y_test, y_pred_test)
test_recall = recall_score(y_test, y_pred_test)

# Predict on the training set
y_pred_train = model.predict(X_train)

# Calculate training metrics
train_accuracy = accuracy_score(y_train, y_pred_train)
train_auc = roc_auc_score(y_train, y_pred_train)
train_precision = precision_score(y_train, y_pred_train)
train_recall = recall_score(y_train, y_pred_train)

# Calculate the log-loss for the training set
train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))

# Predict on the validation set
y_pred_val = model.predict(X_val)

# Calculate validation metrics
validation_accuracy = accuracy_score(y_val, y_pred_val)
validation_auc = roc_auc_score(y_val, y_pred_val)
validation_precision = precision_score(y_val, y_pred_val)
validation_recall = recall_score(y_val, y_pred_val)

# Calculate the log-loss for the validation set
validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))

# Print the metrics
print(f"Test accuracy: {test_accuracy}")
print(f"Test AUC: {test_auc}")
print(f"Test precision: {test_precision}")
print(f"Test recall: {test_recall}")
print(f"Train accuracy: {train_accuracy}")
print(f"Train AUC: {train_auc}")
print(f"Train precision: {train_precision}")
print(f"Train recall: {train_recall}")
print(f"Train loss: {train_loss}")
print(f"Validation accuracy: {validation_accuracy}")
print(f"Validation AUC: {validation_auc}")
print(f"Validation precision: {validation_precision}")
print(f"Validation recall: {validation_recall}")
print(f"Validation loss: {validation_loss}")

Test accuracy: 0.9807692307692307
Test AUC: 0.9736842105263157
Test precision: 1.0
Test recall: 0.9473684210526315
Train accuracy: 0.9974874371859297
Train AUC: 0.9966216216216216
Train precision: 1.0
Train recall: 0.9932432432432432
Train loss: nan
Validation accuracy: 0.957983193277311
Validation AUC: 0.961861861861862
Validation precision: 0.9166666666666666
Validation recall: 0.9777777777777777
Validation loss: nan


  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))
  valida

<a id="section25"></a>
# <font color="#004D7F" size=5> 2.5. Support Vector Machine</font>

In [None]:
from sklearn import svm
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

model = svm.SVC(probability=True)

parameters = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]

# Run the grid search
grid_obj = GridSearchCV(model, parameters)
grid_obj = grid_obj.fit(X_train, y_train)

# Set the model to the best combination of parameters
model = grid_obj.best_estimator_

model.fit(X_train, y_train)

y_pred_test = model.predict(X_test)

# Calculate test metrics
test_accuracy = accuracy_score(y_test, y_pred_test)
test_auc = roc_auc_score(y_test, y_pred_test)
test_precision = precision_score(y_test, y_pred_test)
test_recall = recall_score(y_test, y_pred_test)

# Predict on the training set
y_pred_train = model.predict(X_train)

# Calculate training metrics
train_accuracy = accuracy_score(y_train, y_pred_train)
train_auc = roc_auc_score(y_train, y_pred_train)
train_precision = precision_score(y_train, y_pred_train)
train_recall = recall_score(y_train, y_pred_train)

# Calculate the log-loss for the training set
train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))

# Predict on the validation set
y_pred_val = model.predict(X_val)

# Calculate validation metrics
validation_accuracy = accuracy_score(y_val, y_pred_val)
validation_auc = roc_auc_score(y_val, y_pred_val)
validation_precision = precision_score(y_val, y_pred_val)
validation_recall = recall_score(y_val, y_pred_val)

# Calculate the log-loss for the validation set
validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))

# Print the metrics
print(f"Test accuracy: {test_accuracy}")
print(f"Test AUC: {test_auc}")
print(f"Test precision: {test_precision}")
print(f"Test recall: {test_recall}")
print(f"Train accuracy: {train_accuracy}")
print(f"Train AUC: {train_auc}")
print(f"Train precision: {train_precision}")
print(f"Train recall: {train_recall}")
print(f"Train loss: {train_loss}")
print(f"Validation accuracy: {validation_accuracy}")
print(f"Validation AUC: {validation_auc}")
print(f"Validation precision: {validation_precision}")
print(f"Validation recall: {validation_recall}")
print(f"Validation loss: {validation_loss}")

Test accuracy: 0.9615384615384616
Test AUC: 0.9585326953748006
Test precision: 0.9473684210526315
Test recall: 0.9473684210526315
Train accuracy: 0.9899497487437185
Train AUC: 0.9878648648648649
Train precision: 0.9931506849315068
Train recall: 0.9797297297297297
Train loss: 0.04349174230868833
Validation accuracy: 0.957983193277311
Validation AUC: 0.9575075075075076
Validation precision: 0.9347826086956522
Validation recall: 0.9555555555555556
Validation loss: 0.14731217528706578


<a id="section26"></a>
# <font color="#004D7F" size=5> 2.6. K-Nearest Neighbours</font>

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score

model = KNeighborsClassifier()

parameters = {'n_neighbors': [3, 4, 5, 10],
              'weights': ['uniform', 'distance'],
              'algorithm' : ['auto', 'ball_tree', 'kd_tree', 'brute'],
              'leaf_size' : [10, 20, 30, 50]
             }

# Run the grid search
grid_obj = GridSearchCV(model, parameters)
grid_obj = grid_obj.fit(X_train, y_train)

# Set the model to the best combination of parameters
model = grid_obj.best_estimator_

model.fit(X_train,y_train)
y_pred_test = model.predict(X_test)

# Calculate test metrics
test_accuracy = accuracy_score(y_test, y_pred_test)
test_auc = roc_auc_score(y_test, y_pred_test)
test_precision = precision_score(y_test, y_pred_test)
test_recall = recall_score(y_test, y_pred_test)

# Predict on the training set
y_pred_train = model.predict(X_train)

# Calculate training metrics
train_accuracy = accuracy_score(y_train, y_pred_train)
train_auc = roc_auc_score(y_train, y_pred_train)
train_precision = precision_score(y_train, y_pred_train)
train_recall = recall_score(y_train, y_pred_train)

# Calculate the log-loss for the training set
train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))

# Predict on the validation set
y_pred_val = model.predict(X_val)

# Calculate validation metrics
validation_accuracy = accuracy_score(y_val, y_pred_val)
validation_auc = roc_auc_score(y_val, y_pred_val)
validation_precision = precision_score(y_val, y_pred_val)
validation_recall = recall_score(y_val, y_pred_val)

# Calculate the log-loss for the validation set
validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))

# Print the metrics
print(f"Test accuracy: {test_accuracy}")
print(f"Test AUC: {test_auc}")
print(f"Test precision: {test_precision}")
print(f"Test recall: {test_recall}")
print(f"Train accuracy: {train_accuracy}")
print(f"Train AUC: {train_auc}")
print(f"Train precision: {train_precision}")
print(f"Train recall: {train_recall}")
print(f"Train loss: {train_loss}")
print(f"Validation accuracy: {validation_accuracy}")
print(f"Validation AUC: {validation_auc}")
print(f"Validation precision: {validation_precision}")
print(f"Validation recall: {validation_recall}")
print(f"Validation loss: {validation_loss}")

Test accuracy: 0.9615384615384616
Test AUC: 0.9473684210526316
Test precision: 1.0
Test recall: 0.8947368421052632
Train accuracy: 1.0
Train AUC: 1.0
Train precision: 1.0
Train recall: 1.0
Train loss: nan
Validation accuracy: 0.9495798319327731
Validation AUC: 0.9507507507507508
Validation precision: 0.9148936170212766
Validation recall: 0.9555555555555556
Validation loss: nan


  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))
  valida

<a id="section30"></a>
# <font color="#004D7F" size=5> 3.0. Lazy Predict</font>

In [8]:
from lazypredict.Supervised import LazyClassifier

clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None, random_state=42)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)

models


 97%|█████████▋| 28/29 [00:30<00:00,  1.69it/s]

[LightGBM] [Info] Number of positive: 4466, number of negative: 2534
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001062 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3060
[LightGBM] [Info] Number of data points in the train set: 7000, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.638000 -> initscore=0.566694
[LightGBM] [Info] Start training from score 0.566694


100%|██████████| 29/29 [00:30<00:00,  1.06s/it]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
SVC,0.96,0.95,0.95,0.96,1.21
XGBClassifier,0.95,0.94,0.94,0.95,0.48
LGBMClassifier,0.94,0.93,0.93,0.94,0.37
RandomForestClassifier,0.93,0.92,0.92,0.93,2.2
ExtraTreesClassifier,0.93,0.91,0.91,0.93,1.57
BaggingClassifier,0.89,0.89,0.89,0.89,1.05
NuSVC,0.9,0.88,0.88,0.9,2.56
QuadraticDiscriminantAnalysis,0.88,0.86,0.86,0.88,0.05
DecisionTreeClassifier,0.85,0.84,0.84,0.85,0.16
AdaBoostClassifier,0.85,0.83,0.83,0.85,3.41
