In [5]:
#  1. En utilisant l’API sklearn entraîner les modèles en utilisant ces algos
# « KNN, Decision Tree, ANN, Naive Bayes, SVM  selon les kernels suivants :
# Linear, polynomial  et guassain»

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, log_loss, roc_auc_score, confusion_matrix, classification_report
import pandas as pd

# Charger le jeu de données depuis le fichier CSV
file_dataset = "pima-indians-diabetes.csv"

# --- En-tête des colonnes ---
features = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 
         'pedi', 'age', 'class']

# --- Lire le fichier ---
df1 = pd.read_csv(file_dataset, names=features)

x=df1[['preg', 'plas', 'pres', 'skin']] #we only take the first four features.
y=df1['class']

#Split data set into training set and test set
X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.3)
#70% training and 30% test

# 1.1 KNN
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)

# 1.2 Decision Tree
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)

# 1.3 Artificial Neural Network (ANN)
ann_model = MLPClassifier()
ann_model.fit(X_train, y_train)

# 1.4 Naive Bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# 1.5 SVM with Linear kernel
svm_linear_model = SVC(kernel='linear', probability=True)
svm_linear_model.fit(X_train, y_train)

# 1.6 SVM with Polynomial kernel
svm_poly_model = SVC(kernel='poly', probability=True)
svm_poly_model.fit(X_train, y_train)

# 1.7 SVM with Gaussian kernel
svm_rbf_model = SVC(kernel='rbf', probability=True)
svm_rbf_model.fit(X_train, y_train)

In [6]:
import joblib

# Load the pre-trained models
loaded_knn_model = joblib.load('knn_model.pkl')
loaded_dt_model = joblib.load('dt_model.pkl')
loaded_ann_model = joblib.load('ann_model.pkl')
loaded_nb_model = joblib.load('nb_model.pkl')
loaded_svm_linear_model = joblib.load('svm_linear_model.pkl')
loaded_svm_poly_model = joblib.load('svm_poly_model.pkl')
loaded_svm_rbf_model = joblib.load('svm_rbf_model.pkl')

In [16]:
from prettytable import PrettyTable
from sklearn.metrics import accuracy_score, log_loss, roc_auc_score, confusion_matrix, classification_report

# Assume X_test and y_test are your testing data

models = [knn_model, dt_model, ann_model, nb_model, svm_linear_model, svm_poly_model, svm_rbf_model]

# Create a PrettyTable object for the main metrics table
main_table = PrettyTable()
main_table.field_names = ["Model", "Accuracy", "Log Loss", "AUC", "Confusion Matrix"]

# Create a PrettyTable object for the classification report table
report_table = PrettyTable()
report_table.field_names = ["Model", "Class", "Precision", "Recall", "F1-Score", "Support"]

for model in models:
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    log_loss_value = log_loss(y_test, model.predict_proba(X_test))
    auc_value = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
    confusion_matrix_str = str(confusion_matrix(y_test, y_pred)).replace('\n', ' ')

    # Add a row to the main metrics table
    main_table.add_row([model.__class__.__name__, f"{accuracy:.4f}", f"{log_loss_value:.4f}", f"{auc_value:.4f}", confusion_matrix_str])

    # Create a classification report table
    report = classification_report(y_test, y_pred, output_dict=True)
    for class_label, metrics in report.items():
        if class_label not in ['micro avg', 'macro avg', 'weighted avg']:
            # Check if the metrics are dictionary-like (class entries)
            if isinstance(metrics, dict):
                precision = f"{metrics['precision']:.4f}"
                recall = f"{metrics['recall']:.4f}"
                f1_score = f"{metrics['f1-score']:.4f}"
                support = f"{metrics['support']:.0f}"

                # Add a row to the classification report table
                report_table.add_row([model.__class__.__name__, f"Class {class_label}", precision, recall, f1_score, support])

# Print the main metrics table
print("Main Metrics:")
print(main_table)

# Print the classification report table
print("\nClassification Report:")
print(report_table)


Main Metrics:
+------------------------+----------+----------+--------+------------------------+
|         Model          | Accuracy | Log Loss |  AUC   |    Confusion Matrix    |
+------------------------+----------+----------+--------+------------------------+
|  KNeighborsClassifier  |  0.7316  |  2.2845  | 0.7295 | [[136  24]  [ 38  33]] |
| DecisionTreeClassifier |  0.6537  | 12.4827  | 0.6011 | [[118  42]  [ 38  33]] |
|     MLPClassifier      |  0.7143  |  0.5531  | 0.7261 | [[134  26]  [ 40  31]] |
|       GaussianNB       |  0.7749  |  0.4903  | 0.8150 | [[142  18]  [ 34  37]] |
|          SVC           |  0.7835  |  0.4792  | 0.8194 | [[145  15]  [ 35  36]] |
|          SVC           |  0.7749  |  0.4973  | 0.8158 | [[149  11]  [ 41  30]] |
|          SVC           |  0.7792  |  0.5027  | 0.8088 | [[148  12]  [ 39  32]] |
+------------------------+----------+----------+--------+------------------------+

Classification Report:
+------------------------+---------+-----------+-

In [8]:
from prettytable import PrettyTable
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

# Define your models
models = [KNeighborsClassifier(), DecisionTreeClassifier(), MLPClassifier(), GaussianNB(),
          SVC(kernel='linear'), SVC(kernel='poly'), SVC(kernel='rbf')]

# Create a PrettyTable object
table = PrettyTable()
table.field_names = ["Model", "Cross-Validated Accuracy"]

for model in models:
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
    avg_accuracy = scores.mean()

    # Add a row to the table
    table.add_row([model.__class__.__name__, f"{avg_accuracy:.4f}"])

# Print the table
print(table)


+------------------------+--------------------------+
|         Model          | Cross-Validated Accuracy |
+------------------------+--------------------------+
|  KNeighborsClassifier  |          0.7096          |
| DecisionTreeClassifier |          0.6666          |
|     MLPClassifier      |          0.6946          |
|       GaussianNB       |          0.7468          |
|          SVC           |          0.7356          |
|          SVC           |          0.7561          |
|          SVC           |          0.7524          |
+------------------------+--------------------------+


In [9]:
from prettytable import PrettyTable
import joblib

# List of loaded models
loaded_models = [loaded_knn_model, loaded_dt_model, loaded_ann_model, loaded_nb_model,
                 loaded_svm_linear_model, loaded_svm_poly_model, loaded_svm_rbf_model]

# Ensure the loaded models are fitted with your training data
for model in loaded_models:
    model.fit(X_train, y_train)

# Create a PrettyTable object
table = PrettyTable()
table.field_names = ["Model", "Predictions"]
table.max_width["Predictions"] = 50  # Adjust the width of the "Predictions" column

# Make predictions for each loaded model on the test set
for model in loaded_models:
    y_pred = model.predict(X_test)
    predictions_str = str(y_pred).replace('\n', ' ')

    # Add a row to the table
    table.add_row([model.__class__.__name__, predictions_str])

# Print the table
print(table)




+------------------------+----------------------------------------------------+
|         Model          |                    Predictions                     |
+------------------------+----------------------------------------------------+
|  KNeighborsClassifier  | [1 0 1 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 |
|                        | 0 0 1 0 0 0 0 1 1 0 0 0  0 1 0 1 1 0 0 0 0 0 0 1 0 |
|                        | 0 0 0 0 0 1 0 1 0 0 0 1 1 0 1 1 0 1 0 0 0 0 0 0  0 |
|                        | 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0  |
|                        | 0 0 1 0 0 0 0 1 1 0 0  0 0 0 1 0 0 0 1 1 1 0 0 1 0 |
|                        | 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  0 1 |
|                        | 0 0 1 0 0 1 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 1  |
|                        | 1 0 0 0 1 0 1 0 1 1  0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 |
|                        | 1 1 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 1 0  0 0 0 |
|                        |              

In [10]:
# 6.1 Bagging
bagging_model = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=10)
bagging_model.fit(X_train, y_train)

# 6.2 Stacking
stacking_model = RandomForestClassifier()
stacking_model.fit(X_train, y_train)

# 6.3 Boosting
boosting_model = AdaBoostClassifier(estimator=DecisionTreeClassifier(), n_estimators=10)
boosting_model.fit(X_train, y_train)


In [13]:
from prettytable import PrettyTable
from sklearn.metrics import accuracy_score, log_loss, roc_auc_score, confusion_matrix, classification_report

# Ensemble models
ensemble_models = [bagging_model, stacking_model, boosting_model]

# List of loaded models
loaded_models = [loaded_knn_model, loaded_dt_model, loaded_ann_model, loaded_nb_model,
                 loaded_svm_linear_model, loaded_svm_poly_model, loaded_svm_rbf_model]

all_models = loaded_models + ensemble_models

# Ensure the loaded and ensemble models are fitted with your training data
for model in all_models:
    model.fit(X_train, y_train)

# Create a PrettyTable object for the main metrics table
main_table = PrettyTable()
main_table.field_names = ["Model", "Accuracy", "Log Loss", "AUC", "Confusion Matrix"]

# Create a PrettyTable object for the classification report table
report_table = PrettyTable()
report_table.field_names = ["Model", "Precision", "Recall", "F1-Score", "Support"]

# Make predictions for each model on the test set
for model in all_models:
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    log_loss_value = log_loss(y_test, getattr(model, 'predict_proba', None)(X_test))
    auc_value = roc_auc_score(y_test, getattr(model, 'predict_proba', None)(X_test)[:, 1])
    confusion_matrix_str = str(confusion_matrix(y_test, y_pred)).replace('\n', ' ')
    
    # Add a row to the main metrics table
    main_table.add_row([model.__class__.__name__, f"{accuracy:.4f}", f"{log_loss_value:.4f}", f"{auc_value:.4f}", confusion_matrix_str])

    # Add a row to the classification report table
    report = classification_report(y_test, y_pred, output_dict=True)
    for class_label, metrics in report.items():
        if class_label not in ['micro avg', 'macro avg', 'weighted avg']:
            try:
                precision = f"{metrics['precision']:.4f}"
                recall = f"{metrics['recall']:.4f}"
                f1_score = f"{metrics['f1-score']:.4f}"
                support = f"{metrics['support']:.0f}"
            except TypeError:
                continue
            report_table.add_row([f"{model.__class__.__name__} (Class {class_label})", precision, recall, f1_score, support])

# Print the main metrics table
print("Main Metrics:")
print(main_table)

# Print the classification report table
print("\nClassification Report:")
print(report_table)


Main Metrics:
+------------------------+----------+----------+--------+------------------------+
|         Model          | Accuracy | Log Loss |  AUC   |    Confusion Matrix    |
+------------------------+----------+----------+--------+------------------------+
|  KNeighborsClassifier  |  0.7316  |  2.2845  | 0.7295 | [[136  24]  [ 38  33]] |
| DecisionTreeClassifier |  0.6277  | 13.4188  | 0.5746 | [[114  46]  [ 40  31]] |
|     MLPClassifier      |  0.7446  |  0.5391  | 0.7408 | [[148  12]  [ 47  24]] |
|       GaussianNB       |  0.7749  |  0.4903  | 0.8150 | [[142  18]  [ 34  37]] |
|          SVC           |  0.7835  |  0.4785  | 0.8194 | [[145  15]  [ 35  36]] |
|          SVC           |  0.7749  |  0.4970  | 0.8158 | [[149  11]  [ 41  30]] |
|          SVC           |  0.7792  |  0.5023  | 0.8088 | [[148  12]  [ 39  32]] |
|   BaggingClassifier    |  0.7273  |  1.2761  | 0.7525 | [[135  25]  [ 38  33]] |
| RandomForestClassifier |  0.7273  |  0.6848  | 0.7600 | [[138  22]  [ 4