<a id="section10"></a>
# <font color="#004D7F" size=5> 1.0. System setup</font>

In [1]:
!pip install lazypredict

Collecting lazypredict
  Downloading lazypredict-0.2.12-py2.py3-none-any.whl (12 kB)
Installing collected packages: lazypredict
Successfully installed lazypredict-0.2.12


In [2]:
import os
import lazypredict
import pandas as pd
import numpy as np

from google.colab import drive

np.random.seed(42)

drive.mount('/content/drive')
dataset_path = "/content/drive/MyDrive/Máster UNED/TFM/Datasets/winequality-red.csv"

Mounted at /content/drive


<a id="section11"></a>
# <font color="#004D7F" size=5> 1.1. Read the dataset</font>

In [3]:
#Read CSV
df=pd.read_csv(dataset_path, delimiter=';')

column_to_move = df.pop('quality')
df['quality'] = column_to_move

class_col = df.iloc[:,-1]
df = df.iloc[: , :-1]
df['quality'] = class_col
df

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5


In [4]:
df_x = df.drop('quality', axis = 1)
df_y = df['quality']

<a id="section11"></a>
# <font color="#004D7F" size=5> 1.2. Preparing experimentation</font>

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size = 0.30, random_state = 42)
#X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size = 0.30, random_state = 42,stratify=y_test)

In [6]:
df_train = pd.concat([X_train, pd.DataFrame({'quality': y_train})], axis = 1)
df_test = pd.concat([X_test, pd.DataFrame({'quality': y_test})], axis = 1)
#df_val = pd.concat([X_val, pd.DataFrame({'class': y_val})], axis = 1)

<a id="section21"></a>
# <font color="#004D7F" size=5> 2.1. Logistic Regression</font>

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

model = LogisticRegression(solver='liblinear', max_iter=1000)
model.fit(X_train, y_train)

y_pred_test = model.predict(X_test)

# Calculate test metrics
test_accuracy = accuracy_score(y_test, y_pred_test)
test_auc = roc_auc_score(y_test, y_pred_test)
test_precision = precision_score(y_test, y_pred_test)
test_recall = recall_score(y_test, y_pred_test)

# Predict on the training set
y_pred_train = model.predict(X_train)

# Calculate training metrics
train_accuracy = accuracy_score(y_train, y_pred_train)
train_auc = roc_auc_score(y_train, y_pred_train)
train_precision = precision_score(y_train, y_pred_train)
train_recall = recall_score(y_train, y_pred_train)

# Calculate the log-loss for the training set
train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))

# Predict on the validation set
y_pred_val = model.predict(X_val)

# Calculate validation metrics
validation_accuracy = accuracy_score(y_val, y_pred_val)
validation_auc = roc_auc_score(y_val, y_pred_val)
validation_precision = precision_score(y_val, y_pred_val)
validation_recall = recall_score(y_val, y_pred_val)

# Calculate the log-loss for the validation set
validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))

# Print the metrics
print(f"Test accuracy: {test_accuracy}")
print(f"Test AUC: {test_auc}")
print(f"Test precision: {test_precision}")
print(f"Test recall: {test_recall}")
print(f"Train accuracy: {train_accuracy}")
print(f"Train AUC: {train_auc}")
print(f"Train precision: {train_precision}")
print(f"Train recall: {train_recall}")
print(f"Train loss: {train_loss}")
print(f"Validation accuracy: {validation_accuracy}")
print(f"Validation AUC: {validation_auc}")
print(f"Validation precision: {validation_precision}")
print(f"Validation recall: {validation_recall}")
print(f"Validation loss: {validation_loss}")

Test accuracy: 0.9615384615384616
Test AUC: 0.9585326953748006
Test precision: 0.9473684210526315
Test recall: 0.9473684210526315
Train accuracy: 0.9899497487437185
Train AUC: 0.9878648648648649
Train precision: 0.9931506849315068
Train recall: 0.9797297297297297
Train loss: nan
Validation accuracy: 0.9663865546218487
Validation AUC: 0.9642642642642645
Validation precision: 0.9555555555555556
Validation recall: 0.9555555555555556
Validation loss: nan


  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))


<a id="section22"></a>
# <font color="#004D7F" size=5> 2.2. Gaussian Naive Bayes</font>

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score

model = GaussianNB()

model.fit(X_train,y_train)
y_pred_test = model.predict(X_test)

# Calculate test metrics
test_accuracy = accuracy_score(y_test, y_pred_test)
test_auc = roc_auc_score(y_test, y_pred_test)
test_precision = precision_score(y_test, y_pred_test)
test_recall = recall_score(y_test, y_pred_test)

# Predict on the training set
y_pred_train = model.predict(X_train)

# Calculate training metrics
train_accuracy = accuracy_score(y_train, y_pred_train)
train_auc = roc_auc_score(y_train, y_pred_train)
train_precision = precision_score(y_train, y_pred_train)
train_recall = recall_score(y_train, y_pred_train)

# Calculate the log-loss for the training set
train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))

# Predict on the validation set
y_pred_val = model.predict(X_val)

# Calculate validation metrics
validation_accuracy = accuracy_score(y_val, y_pred_val)
validation_auc = roc_auc_score(y_val, y_pred_val)
validation_precision = precision_score(y_val, y_pred_val)
validation_recall = recall_score(y_val, y_pred_val)

# Calculate the log-loss for the validation set
validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))

# Print the metrics
print(f"Test accuracy: {test_accuracy}")
print(f"Test AUC: {test_auc}")
print(f"Test precision: {test_precision}")
print(f"Test recall: {test_recall}")
print(f"Train accuracy: {train_accuracy}")
print(f"Train AUC: {train_auc}")
print(f"Train precision: {train_precision}")
print(f"Train recall: {train_recall}")
print(f"Train loss: {train_loss}")
print(f"Validation accuracy: {validation_accuracy}")
print(f"Validation AUC: {validation_auc}")
print(f"Validation precision: {validation_precision}")
print(f"Validation recall: {validation_recall}")
print(f"Validation loss: {validation_loss}")

Test accuracy: 0.9423076923076923
Test AUC: 0.9322169059011166
Test precision: 0.9444444444444444
Test recall: 0.8947368421052632
Train accuracy: 0.9346733668341709
Train AUC: 0.9231891891891892
Train precision: 0.9420289855072463
Train recall: 0.8783783783783784
Train loss: nan
Validation accuracy: 0.907563025210084
Validation AUC: 0.916966966966967
Validation precision: 0.8269230769230769
Validation recall: 0.9555555555555556
Validation loss: nan


  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))


<a id="section23"></a>
# <font color="#004D7F" size=5> 2.3. Decision Tree</font>

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score

model = DecisionTreeClassifier()

model.fit(X_train,y_train)
y_pred_test = model.predict(X_test)

# Calculate test metrics
test_accuracy = accuracy_score(y_test, y_pred_test)
test_auc = roc_auc_score(y_test, y_pred_test)
test_precision = precision_score(y_test, y_pred_test)
test_recall = recall_score(y_test, y_pred_test)

# Predict on the training set
y_pred_train = model.predict(X_train)

# Calculate training metrics
train_accuracy = accuracy_score(y_train, y_pred_train)
train_auc = roc_auc_score(y_train, y_pred_train)
train_precision = precision_score(y_train, y_pred_train)
train_recall = recall_score(y_train, y_pred_train)

# Calculate the log-loss for the training set
train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))

# Predict on the validation set
y_pred_val = model.predict(X_val)

# Calculate validation metrics
validation_accuracy = accuracy_score(y_val, y_pred_val)
validation_auc = roc_auc_score(y_val, y_pred_val)
validation_precision = precision_score(y_val, y_pred_val)
validation_recall = recall_score(y_val, y_pred_val)

# Calculate the log-loss for the validation set
validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))

# Print the metrics
print(f"Test accuracy: {test_accuracy}")
print(f"Test AUC: {test_auc}")
print(f"Test precision: {test_precision}")
print(f"Test recall: {test_recall}")
print(f"Train accuracy: {train_accuracy}")
print(f"Train AUC: {train_auc}")
print(f"Train precision: {train_precision}")
print(f"Train recall: {train_recall}")
print(f"Train loss: {train_loss}")
print(f"Validation accuracy: {validation_accuracy}")
print(f"Validation AUC: {validation_auc}")
print(f"Validation precision: {validation_precision}")
print(f"Validation recall: {validation_recall}")
print(f"Validation loss: {validation_loss}")

Test accuracy: 1.0
Test AUC: 1.0
Test precision: 1.0
Test recall: 1.0
Train accuracy: 1.0
Train AUC: 1.0
Train precision: 1.0
Train recall: 1.0
Train loss: nan
Validation accuracy: 1.0
Validation AUC: 1.0
Validation precision: 1.0
Validation recall: 1.0
Validation loss: nan


<a id="section24"></a>
# <font color="#004D7F" size=5> 2.4. Random Forest</font>

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score

model = RandomForestClassifier()

parameters = {'n_estimators': [4, 6, 9, 10, 15],
              'max_features': ['log2', 'sqrt'],
              'criterion': ['entropy', 'gini'],
              'max_depth': [2, 3, 5, 10],
              'min_samples_split': [2, 3, 5],
              'min_samples_leaf': [1, 5, 8]
             }

# Run the grid search
grid_obj = GridSearchCV(model, parameters)
grid_obj = grid_obj.fit(X_train, y_train)

# Set the model to the best combination of parameters
model = grid_obj.best_estimator_

model.fit(X_train,y_train)
y_pred_test = model.predict(X_test)

# Calculate test metrics
test_accuracy = accuracy_score(y_test, y_pred_test)
test_auc = roc_auc_score(y_test, y_pred_test)
test_precision = precision_score(y_test, y_pred_test)
test_recall = recall_score(y_test, y_pred_test)

# Predict on the training set
y_pred_train = model.predict(X_train)

# Calculate training metrics
train_accuracy = accuracy_score(y_train, y_pred_train)
train_auc = roc_auc_score(y_train, y_pred_train)
train_precision = precision_score(y_train, y_pred_train)
train_recall = recall_score(y_train, y_pred_train)

# Calculate the log-loss for the training set
train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))

# Predict on the validation set
y_pred_val = model.predict(X_val)

# Calculate validation metrics
validation_accuracy = accuracy_score(y_val, y_pred_val)
validation_auc = roc_auc_score(y_val, y_pred_val)
validation_precision = precision_score(y_val, y_pred_val)
validation_recall = recall_score(y_val, y_pred_val)

# Calculate the log-loss for the validation set
validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))

# Print the metrics
print(f"Test accuracy: {test_accuracy}")
print(f"Test AUC: {test_auc}")
print(f"Test precision: {test_precision}")
print(f"Test recall: {test_recall}")
print(f"Train accuracy: {train_accuracy}")
print(f"Train AUC: {train_auc}")
print(f"Train precision: {train_precision}")
print(f"Train recall: {train_recall}")
print(f"Train loss: {train_loss}")
print(f"Validation accuracy: {validation_accuracy}")
print(f"Validation AUC: {validation_auc}")
print(f"Validation precision: {validation_precision}")
print(f"Validation recall: {validation_recall}")
print(f"Validation loss: {validation_loss}")

Test accuracy: 0.9807692307692307
Test AUC: 0.9736842105263157
Test precision: 1.0
Test recall: 0.9473684210526315
Train accuracy: 0.9974874371859297
Train AUC: 0.9966216216216216
Train precision: 1.0
Train recall: 0.9932432432432432
Train loss: nan
Validation accuracy: 0.957983193277311
Validation AUC: 0.961861861861862
Validation precision: 0.9166666666666666
Validation recall: 0.9777777777777777
Validation loss: nan


  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))
  valida

<a id="section25"></a>
# <font color="#004D7F" size=5> 2.5. Support Vector Machine</font>

In [None]:
from sklearn import svm
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

model = svm.SVC(probability=True)

parameters = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]

# Run the grid search
grid_obj = GridSearchCV(model, parameters)
grid_obj = grid_obj.fit(X_train, y_train)

# Set the model to the best combination of parameters
model = grid_obj.best_estimator_

model.fit(X_train, y_train)

y_pred_test = model.predict(X_test)

# Calculate test metrics
test_accuracy = accuracy_score(y_test, y_pred_test)
test_auc = roc_auc_score(y_test, y_pred_test)
test_precision = precision_score(y_test, y_pred_test)
test_recall = recall_score(y_test, y_pred_test)

# Predict on the training set
y_pred_train = model.predict(X_train)

# Calculate training metrics
train_accuracy = accuracy_score(y_train, y_pred_train)
train_auc = roc_auc_score(y_train, y_pred_train)
train_precision = precision_score(y_train, y_pred_train)
train_recall = recall_score(y_train, y_pred_train)

# Calculate the log-loss for the training set
train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))

# Predict on the validation set
y_pred_val = model.predict(X_val)

# Calculate validation metrics
validation_accuracy = accuracy_score(y_val, y_pred_val)
validation_auc = roc_auc_score(y_val, y_pred_val)
validation_precision = precision_score(y_val, y_pred_val)
validation_recall = recall_score(y_val, y_pred_val)

# Calculate the log-loss for the validation set
validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))

# Print the metrics
print(f"Test accuracy: {test_accuracy}")
print(f"Test AUC: {test_auc}")
print(f"Test precision: {test_precision}")
print(f"Test recall: {test_recall}")
print(f"Train accuracy: {train_accuracy}")
print(f"Train AUC: {train_auc}")
print(f"Train precision: {train_precision}")
print(f"Train recall: {train_recall}")
print(f"Train loss: {train_loss}")
print(f"Validation accuracy: {validation_accuracy}")
print(f"Validation AUC: {validation_auc}")
print(f"Validation precision: {validation_precision}")
print(f"Validation recall: {validation_recall}")
print(f"Validation loss: {validation_loss}")

Test accuracy: 0.9615384615384616
Test AUC: 0.9585326953748006
Test precision: 0.9473684210526315
Test recall: 0.9473684210526315
Train accuracy: 0.9899497487437185
Train AUC: 0.9878648648648649
Train precision: 0.9931506849315068
Train recall: 0.9797297297297297
Train loss: 0.04349174230868833
Validation accuracy: 0.957983193277311
Validation AUC: 0.9575075075075076
Validation precision: 0.9347826086956522
Validation recall: 0.9555555555555556
Validation loss: 0.14731217528706578


<a id="section26"></a>
# <font color="#004D7F" size=5> 2.6. K-Nearest Neighbours</font>

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score

model = KNeighborsClassifier()

parameters = {'n_neighbors': [3, 4, 5, 10],
              'weights': ['uniform', 'distance'],
              'algorithm' : ['auto', 'ball_tree', 'kd_tree', 'brute'],
              'leaf_size' : [10, 20, 30, 50]
             }

# Run the grid search
grid_obj = GridSearchCV(model, parameters)
grid_obj = grid_obj.fit(X_train, y_train)

# Set the model to the best combination of parameters
model = grid_obj.best_estimator_

model.fit(X_train,y_train)
y_pred_test = model.predict(X_test)

# Calculate test metrics
test_accuracy = accuracy_score(y_test, y_pred_test)
test_auc = roc_auc_score(y_test, y_pred_test)
test_precision = precision_score(y_test, y_pred_test)
test_recall = recall_score(y_test, y_pred_test)

# Predict on the training set
y_pred_train = model.predict(X_train)

# Calculate training metrics
train_accuracy = accuracy_score(y_train, y_pred_train)
train_auc = roc_auc_score(y_train, y_pred_train)
train_precision = precision_score(y_train, y_pred_train)
train_recall = recall_score(y_train, y_pred_train)

# Calculate the log-loss for the training set
train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))

# Predict on the validation set
y_pred_val = model.predict(X_val)

# Calculate validation metrics
validation_accuracy = accuracy_score(y_val, y_pred_val)
validation_auc = roc_auc_score(y_val, y_pred_val)
validation_precision = precision_score(y_val, y_pred_val)
validation_recall = recall_score(y_val, y_pred_val)

# Calculate the log-loss for the validation set
validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))

# Print the metrics
print(f"Test accuracy: {test_accuracy}")
print(f"Test AUC: {test_auc}")
print(f"Test precision: {test_precision}")
print(f"Test recall: {test_recall}")
print(f"Train accuracy: {train_accuracy}")
print(f"Train AUC: {train_auc}")
print(f"Train precision: {train_precision}")
print(f"Train recall: {train_recall}")
print(f"Train loss: {train_loss}")
print(f"Validation accuracy: {validation_accuracy}")
print(f"Validation AUC: {validation_auc}")
print(f"Validation precision: {validation_precision}")
print(f"Validation recall: {validation_recall}")
print(f"Validation loss: {validation_loss}")

Test accuracy: 0.9615384615384616
Test AUC: 0.9473684210526316
Test precision: 1.0
Test recall: 0.8947368421052632
Train accuracy: 1.0
Train AUC: 1.0
Train precision: 1.0
Train recall: 1.0
Train loss: nan
Validation accuracy: 0.9495798319327731
Validation AUC: 0.9507507507507508
Validation precision: 0.9148936170212766
Validation recall: 0.9555555555555556
Validation loss: nan


  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  train_loss = -np.mean(y_train * np.log(model.predict_proba(X_train)[:, 1]) + (1 - y_train) * np.log(1 - model.predict_proba(X_train)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))
  validation_loss = -np.mean(y_val * np.log(model.predict_proba(X_val)[:, 1]) + (1 - y_val) * np.log(1 - model.predict_proba(X_val)[:, 1]))
  valida

<a id="section30"></a>
# <font color="#004D7F" size=5> 3.0. Lazy Predict</font>

In [7]:
from lazypredict.Supervised import LazyRegressor

reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)

print(models)


 79%|███████▊  | 33/42 [00:08<00:02,  4.05it/s]

QuantileRegressor model failed to execute
Solver interior-point is not anymore available in SciPy >= 1.11.0.


100%|██████████| 42/42 [00:10<00:00,  4.13it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000260 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 1119, number of used features: 11
[LightGBM] [Info] Start training from score 5.621984
                               Adjusted R-Squared  R-Squared  RMSE  Time Taken
Model                                                                         
ExtraTreesRegressor                          0.48       0.49  0.57        0.50
RandomForestRegressor                        0.44       0.45  0.59        0.77
HistGradientBoostingRegressor                0.42       0.43  0.60        1.82
LGBMRegressor                                0.41       0.42  0.61        0.14
BaggingRegressor                             0.41       0.42  0.61        0.14
NuSVR                                        0.40       0.41  0.61        0.16
SVR                         




In [8]:
models

Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ExtraTreesRegressor,0.48,0.49,0.57,0.5
RandomForestRegressor,0.44,0.45,0.59,0.77
HistGradientBoostingRegressor,0.42,0.43,0.6,1.82
LGBMRegressor,0.41,0.42,0.61,0.14
BaggingRegressor,0.41,0.42,0.61,0.14
NuSVR,0.4,0.41,0.61,0.16
SVR,0.39,0.41,0.61,0.17
GradientBoostingRegressor,0.38,0.39,0.62,0.43
AdaBoostRegressor,0.36,0.37,0.63,0.29
XGBRegressor,0.34,0.36,0.64,0.25
