# **IA**
## **Travaux pratiques avec l'algorithme Adaboost**


---
## 1. **Collecte des donnees**

### 1.1. Importation des packets necessaires

In [2]:
import numpy as np 
import pandas as pd

from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

### 1.2. Importation des donnees
Ce dataset contient les données en rapport avec l'état de sante de certains patients. <br>
Certains des patients ont été diagnostiqué d'une maladie cardiaque.

In [3]:
df=pd.read_csv("heart2.csv")
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


### 1.3. Affichage des meta-donnees

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age             918 non-null    int64  
 1   Sex             918 non-null    object 
 2   ChestPainType   918 non-null    object 
 3   RestingBP       918 non-null    int64  
 4   Cholesterol     918 non-null    int64  
 5   FastingBS       918 non-null    int64  
 6   RestingECG      918 non-null    object 
 7   MaxHR           918 non-null    int64  
 8   ExerciseAngina  918 non-null    object 
 9   Oldpeak         918 non-null    float64
 10  ST_Slope        918 non-null    object 
 11  HeartDisease    918 non-null    int64  
dtypes: float64(1), int64(6), object(5)
memory usage: 86.2+ KB


In [5]:
df.shape

(918, 12)

---
## 2. **Preparation des donnees**

### 2.1. Recherche de valeurs nuls ou manquates

In [6]:
df.isnull().sum()

Age               0
Sex               0
ChestPainType     0
RestingBP         0
Cholesterol       0
FastingBS         0
RestingECG        0
MaxHR             0
ExerciseAngina    0
Oldpeak           0
ST_Slope          0
HeartDisease      0
dtype: int64

In [7]:
df.isna().sum()

Age               0
Sex               0
ChestPainType     0
RestingBP         0
Cholesterol       0
FastingBS         0
RestingECG        0
MaxHR             0
ExerciseAngina    0
Oldpeak           0
ST_Slope          0
HeartDisease      0
dtype: int64

## 2.2. Recherche des valeurs répétées

In [8]:
df.Sex.unique()

array(['M', 'F'], dtype=object)

In [9]:
df.ChestPainType.unique()

array(['ATA', 'NAP', 'ASY', 'TA'], dtype=object)

In [10]:
df.RestingECG.unique()

array(['Normal', 'ST', 'LVH'], dtype=object)

In [11]:
df.ExerciseAngina.unique()

array(['N', 'Y'], dtype=object)

In [12]:
df.ST_Slope.unique()

array(['Up', 'Flat', 'Down'], dtype=object)

## 2.3. Encodage des variables qualitatives 
(avec la methode **get_dummies()** de pandas et la methode remplaçage)

In [13]:
df_Sex = pd.get_dummies(df.Sex, prefix='Sex')

df = df.drop(['Sex'], axis=1).join(df_Sex)
df.head()

Unnamed: 0,Age,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease,Sex_F,Sex_M
0,40,ATA,140,289,0,Normal,172,N,0.0,Up,0,0,1
1,49,NAP,160,180,0,Normal,156,N,1.0,Flat,1,1,0
2,37,ATA,130,283,0,ST,98,N,0.0,Up,0,0,1
3,48,ASY,138,214,0,Normal,108,Y,1.5,Flat,1,1,0
4,54,NAP,150,195,0,Normal,122,N,0.0,Up,0,0,1


In [14]:
df_ChestPainType = pd.get_dummies(df.ChestPainType, prefix='ChestPainType')

df = df.drop(['ChestPainType'], axis=1).join(df_ChestPainType)
df.head()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease,Sex_F,Sex_M,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA
0,40,140,289,0,Normal,172,N,0.0,Up,0,0,1,0,1,0,0
1,49,160,180,0,Normal,156,N,1.0,Flat,1,1,0,0,0,1,0
2,37,130,283,0,ST,98,N,0.0,Up,0,0,1,0,1,0,0
3,48,138,214,0,Normal,108,Y,1.5,Flat,1,1,0,1,0,0,0
4,54,150,195,0,Normal,122,N,0.0,Up,0,0,1,0,0,1,0


In [15]:
df_RestingECG = pd.get_dummies(df.RestingECG, prefix='RestingECG')

df = df.drop(['RestingECG'], axis=1).join(df_RestingECG)
df.head()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease,Sex_F,Sex_M,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_LVH,RestingECG_Normal,RestingECG_ST
0,40,140,289,0,172,N,0.0,Up,0,0,1,0,1,0,0,0,1,0
1,49,160,180,0,156,N,1.0,Flat,1,1,0,0,0,1,0,0,1,0
2,37,130,283,0,98,N,0.0,Up,0,0,1,0,1,0,0,0,0,1
3,48,138,214,0,108,Y,1.5,Flat,1,1,0,1,0,0,0,0,1,0
4,54,150,195,0,122,N,0.0,Up,0,0,1,0,0,1,0,0,1,0


In [16]:
df.ExerciseAngina.replace(['N','Y'],[0,1], inplace=True)
df.ExerciseAngina.head()

0    0
1    0
2    0
3    1
4    0
Name: ExerciseAngina, dtype: int64

In [17]:
df.ST_Slope.replace(['Down', 'Flat', 'Up'],[0,1,2], inplace=True)
df.ST_Slope.head()

0    2
1    1
2    2
3    1
4    2
Name: ST_Slope, dtype: int64

## 2.3. Choix des variables d'entree

In [18]:
X = df.drop(['HeartDisease'], axis=1)
X.head()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,Sex_F,Sex_M,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_LVH,RestingECG_Normal,RestingECG_ST
0,40,140,289,0,172,0,0.0,2,0,1,0,1,0,0,0,1,0
1,49,160,180,0,156,0,1.0,1,1,0,0,0,1,0,0,1,0
2,37,130,283,0,98,0,0.0,2,0,1,0,1,0,0,0,0,1
3,48,138,214,0,108,1,1.5,1,1,0,1,0,0,0,0,1,0
4,54,150,195,0,122,0,0.0,2,0,1,0,0,1,0,0,1,0


## 2.4. Choix de la variable a prédire

In [19]:
Y = df.HeartDisease
Y.head()

0    0
1    1
2    0
3    1
4    0
Name: HeartDisease, dtype: int64

## 2.5. Normalisation des donnees d'entrainement

In [20]:
X = StandardScaler().fit_transform(X)
X.std(axis=0)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

## 2.6. Obtention des donnees d'apprentissage et de test
(70% apprentissage et test 30%)

In [21]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y, test_size=0.3, random_state=12)

----
# 3. **Utilisation du classifieur AdaBoost**

## 3.1. Test du modèle

In [22]:
# instanciation du modele AdaboostClassifier
ada = AdaBoostClassifier(n_estimators=25, learning_rate=1)

# fitting du modele
ada_model = ada.fit(X_train, Y_train)

# prediction des donnees
Y_pred = ada_model.predict(X_test)

# calcul de l'accuracy
ada_acc = accuracy_score(Y_test, Y_pred)

print("l'accuracy est de :", ada_acc)

l'accuracy est de : 0.8405797101449275


## 3.2. Recherche du meilleur modele

In [23]:
# configuration des hyperparamètres
params = [
    {'n_estimators': np.arange(1,30)},
    {'learning_rate': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]}
]

# instantiation du modele GridSearchCV
ada_best_model=GridSearchCV(ada, params, cv=10, verbose=16)

# fitting du modèle
ada_best_model.fit(X_train,Y_train)

#parametres optimaux
ada_best_model.best_params_

Fitting 10 folds for each of 39 candidates, totalling 390 fits
[CV 1/10; 1/39] START n_estimators=1............................................
[CV 1/10; 1/39] END .............n_estimators=1;, score=0.815 total time=   0.0s
[CV 2/10; 1/39] START n_estimators=1............................................
[CV 2/10; 1/39] END .............n_estimators=1;, score=0.862 total time=   0.0s
[CV 3/10; 1/39] START n_estimators=1............................................
[CV 3/10; 1/39] END .............n_estimators=1;, score=0.906 total time=   0.0s
[CV 4/10; 1/39] START n_estimators=1............................................
[CV 4/10; 1/39] END .............n_estimators=1;, score=0.750 total time=   0.0s
[CV 5/10; 1/39] START n_estimators=1............................................
[CV 5/10; 1/39] END .............n_estimators=1;, score=0.891 total time=   0.0s
[CV 6/10; 1/39] START n_estimators=1............................................
[CV 6/10; 1/39] END .............n_estimators=

{'n_estimators': 16}

## 3.3. Prédiction et test des performances avec le meilleur modèle

In [24]:
# prediction 
ada_best_pred = ada_best_model.best_estimator_.predict(X_test)

# calcul du score
print("le score du meilleur modèle est de ", ada_best_model.best_score_)

# calcul de  l'accuracy
ada_best_acc = accuracy_score(Y_test, ada_best_pred)
print("l'accuracy du meilleur modèle est de ", ada_best_acc)

le score du meilleur modèle est de  0.8690144230769231
l'accuracy du meilleur modèle est de  0.8478260869565217


----

# 4. **Comparison avec d'autres modèles**

## 4.1. Modèle KNN

### 4.1.1. Test du modèle

In [25]:
# instanciation du modèle 
knn=KNeighborsClassifier(n_neighbors=10)

# fitting du model
knn.fit(X_train,Y_train)

# prediction
knn_pred = knn.predict(X_test)

# calcul de l'accuracy
knn_acc = accuracy_score(Y_test, knn_pred)
 
print("l'accuracy de ce modèle est ", knn_acc)

l'accuracy de ce modèle est  0.8297101449275363


### 4.1.2. Recherche et test du meilleur modèle

In [26]:
# configuration des hyperparamètres
params=[
    {'n_neighbors':np.arange(1,30)},
    {'leaf_size':np.arange(1,50)}
]

# instantiation du modele GridSearchCV
knn_best_model=GridSearchCV(knn, params, cv=5, verbose=16)

# fitting du modèle
knn_best_model.fit(X_train,Y_train)

Fitting 5 folds for each of 78 candidates, totalling 390 fits
[CV 1/5; 1/78] START n_neighbors=1..............................................
[CV 1/5; 1/78] END ...............n_neighbors=1;, score=0.798 total time=   0.0s
[CV 2/5; 1/78] START n_neighbors=1..............................................
[CV 2/5; 1/78] END ...............n_neighbors=1;, score=0.829 total time=   0.0s
[CV 3/5; 1/78] START n_neighbors=1..............................................
[CV 3/5; 1/78] END ...............n_neighbors=1;, score=0.773 total time=   0.0s
[CV 4/5; 1/78] START n_neighbors=1..............................................
[CV 4/5; 1/78] END ...............n_neighbors=1;, score=0.797 total time=   0.0s
[CV 5/5; 1/78] START n_neighbors=1..............................................
[CV 5/5; 1/78] END ...............n_neighbors=1;, score=0.789 total time=   0.0s
[CV 1/5; 2/78] START n_neighbors=2..............................................
[CV 1/5; 2/78] END ...............n_neighbors=2

GridSearchCV(cv=5, estimator=KNeighborsClassifier(n_neighbors=10),
             param_grid=[{'n_neighbors': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])},
                         {'leaf_size': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])}],
             verbose=16)

In [27]:
# calcul des parametres optimaux
knn_best_model.best_params_

{'n_neighbors': 9}

In [28]:
# prediction
knn_best_pred = knn_best_model.best_estimator_.predict(X_test)

# calcul du score
print("le score est de ", knn_best_model.best_score_)

# calcul de l'accuracy
knn_best_acc = accuracy_score(Y_test, knn_best_pred)
print("l'accuracy est de ", knn_best_acc)

le score est de  0.8473110465116278
l'accuracy est de  0.855072463768116


## 4.2. Modèle LogisticRegression

### 4.2.1. Test du modèle

In [29]:
# instanciation du modèle 
lr = LogisticRegression()

#fitting du model
lr.fit(X_train,Y_train)

#prediction
lr_pred = lr.predict(X_test)

# calcul de l'accuracy
lr_acc = accuracy_score(Y_test, lr_pred)
 
print("l'accuracy de ce modèle est ", lr_acc)


l'accuracy de ce modèle est  0.8369565217391305


### 4.2.2. Recherche et test du meilleur modèle

In [30]:
# configuration des hyperparamètres
params=[
    {'penalty':['l1', 'l2', 'elasticnet', 'none']},
    {'solver':['newton-cg', 'liblinear', 'sag', 'saga']},
    {'max_iter':[1000,100,10,34,55,78]}
]

# instantiation du modele GridSearchCV
lr_best_model=GridSearchCV(lr, params, cv=5, verbose=16)

# fitting du modèle
lr_best_model.fit(X_train,Y_train)

Fitting 5 folds for each of 14 candidates, totalling 70 fits
[CV 1/5; 1/14] START penalty=l1.................................................
[CV 1/5; 1/14] END ....................penalty=l1;, score=nan total time=   0.0s
[CV 2/5; 1/14] START penalty=l1.................................................
[CV 2/5; 1/14] END ....................penalty=l1;, score=nan total time=   0.0s
[CV 3/5; 1/14] START penalty=l1.................................................
[CV 3/5; 1/14] END ....................penalty=l1;, score=nan total time=   0.0s
[CV 4/5; 1/14] START penalty=l1.................................................
[CV 4/5; 1/14] END ....................penalty=l1;, score=nan total time=   0.0s
[CV 5/5; 1/14] START penalty=l1.................................................
[CV 5/5; 1/14] END ....................penalty=l1;, score=nan total time=   0.0s
[CV 1/5; 2/14] START penalty=l2.................................................
[CV 1/5; 2/14] END ..................penalty=l2;

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

[CV 5/5; 14/14] END ................max_iter=78;, score=0.828 total time=   0.0s


GridSearchCV(cv=5, estimator=LogisticRegression(),
             param_grid=[{'penalty': ['l1', 'l2', 'elasticnet', 'none']},
                         {'solver': ['newton-cg', 'liblinear', 'sag', 'saga']},
                         {'max_iter': [1000, 100, 10, 34, 55, 78]}],
             verbose=16)

In [31]:
# calcul des parametres optimaux
lr_best_model.best_params_

{'penalty': 'l2'}

In [32]:
# prediction
lr_best_pred = lr_best_model.best_estimator_.predict(X_test)

# calcul du score
print("le score est de ", lr_best_model.best_score_)

# calcul de l'accuracy
lr_best_acc = accuracy_score(Y_test, lr_best_pred)
print("l'accuracy est de ", lr_best_acc)

le score est de  0.8410731589147286
l'accuracy est de  0.8369565217391305


## 4.3. Modèle Naive Bayesian

### 4.3.1. Test du modèle

In [33]:
# instanciation du modèle 
gnb = GaussianNB()

#fitting du model
gnb.fit(X_train,Y_train)

#prediction
gnb_pred = gnb.predict(X_test)

# calcul de l'accuracy
gnb_acc = accuracy_score(Y_test, gnb_pred)
 
print("l'accuracy de ce modèle est ", gnb_acc)

l'accuracy de ce modèle est  0.8188405797101449


### 4.3.2. Recherche et test du meilleur modèle

In [34]:
# configuration des hyperparamètres
params=[
    {'priors':[None,'']}
]

# instantiation du modele GridSearchCV
gnb_best_model=GridSearchCV(gnb, params, cv=5, verbose=16)

# fitting du modèle
gnb_best_model.fit(X_train,Y_train)

Fitting 5 folds for each of 2 candidates, totalling 10 fits
[CV 1/5; 1/2] START priors=None.................................................
[CV 1/5; 1/2] END ..................priors=None;, score=0.845 total time=   0.0s
[CV 2/5; 1/2] START priors=None.................................................
[CV 2/5; 1/2] END ..................priors=None;, score=0.829 total time=   0.0s
[CV 3/5; 1/2] START priors=None.................................................
[CV 3/5; 1/2] END ..................priors=None;, score=0.859 total time=   0.0s
[CV 4/5; 1/2] START priors=None.................................................
[CV 4/5; 1/2] END ..................priors=None;, score=0.836 total time=   0.0s
[CV 5/5; 1/2] START priors=None.................................................
[CV 5/5; 1/2] END ..................priors=None;, score=0.766 total time=   0.0s
[CV 1/5; 2/2] START priors=.....................................................
[CV 1/5; 2/2] END ........................priors=

5 fits failed out of a total of 10.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "/home/serigne/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/serigne/anaconda3/lib/python3.9/site-packages/sklearn/naive_bayes.py", line 245, in fit
    return self._partial_fit(
  File "/home/serigne/anaconda3/lib/python3.9/site-packages/sklearn/naive_bayes.py", line 427, in _partial_fit
    if len(priors) != n_classes:
TypeError: len() of unsized object



GridSearchCV(cv=5, estimator=GaussianNB(), param_grid=[{'priors': [None, '']}],
             verbose=16)

In [35]:
# calcul des parametres optimaux
gnb_best_model.best_params_

{'priors': None}

In [36]:
# prediction
gnb_best_pred = gnb_best_model.best_estimator_.predict(X_test)

# calcul du score
print("le score est de ", gnb_best_model.best_score_)

# calcul de l'accuracy
gnb_best_acc = accuracy_score(Y_test, lr_best_pred)
print("l'accuracy est de ", gnb_best_acc)

le score est de  0.8270712209302324
l'accuracy est de  0.8369565217391305


## 4.4. Modèle Arbre de décision

### 4.4.1. Test du modèle

In [37]:
# instanciation du modèle
dtc = DecisionTreeClassifier()

#fitting du model
dtc.fit(X_train, Y_train)

#prediction
dtc_pred = gnb.predict(X_test)

# calcul de l'accuracy
dtc_acc = accuracy_score(Y_test, dtc_pred)

print("l'accuracy de ce modèle est ", dtc_acc)

l'accuracy de ce modèle est  0.8188405797101449


### 4.4.2. Recherche et test du meilleur modèle

In [38]:
# configuration des hyperparamètres
params = [
    {'criterion': ["gini", "entropy"]},
    {'max_depth': [10, 15, 25, 33, 5, None]},
    {'min_samples_leaf': [1, 5, 4, 3, 2]},
    {'max_leaf_nodes': [None, 3, 50, 100, 23, 10, 56]}
]

# instantiation du modele GridSearchCV
dtc_best_model = GridSearchCV(dtc, params, cv=15, verbose=16)

# fitting du modèle
dtc_best_model.fit(X_train, Y_train)

Fitting 15 folds for each of 20 candidates, totalling 300 fits
[CV 1/15; 1/20] START criterion=gini............................................
[CV 1/15; 1/20] END .............criterion=gini;, score=0.767 total time=   0.0s
[CV 2/15; 1/20] START criterion=gini............................................
[CV 2/15; 1/20] END .............criterion=gini;, score=0.744 total time=   0.0s
[CV 3/15; 1/20] START criterion=gini............................................
[CV 3/15; 1/20] END .............criterion=gini;, score=0.837 total time=   0.0s
[CV 4/15; 1/20] START criterion=gini............................................
[CV 4/15; 1/20] END .............criterion=gini;, score=0.814 total time=   0.0s
[CV 5/15; 1/20] START criterion=gini............................................
[CV 5/15; 1/20] END .............criterion=gini;, score=0.860 total time=   0.0s
[CV 6/15; 1/20] START criterion=gini............................................
[CV 6/15; 1/20] END .............criterion=gin

GridSearchCV(cv=15, estimator=DecisionTreeClassifier(),
             param_grid=[{'criterion': ['gini', 'entropy']},
                         {'max_depth': [10, 15, 25, 33, 5, None]},
                         {'min_samples_leaf': [1, 5, 4, 3, 2]},
                         {'max_leaf_nodes': [None, 3, 50, 100, 23, 10, 56]}],
             verbose=16)

In [39]:
# calcul des parametres optimaux
dtc_best_model.best_params_

{'max_leaf_nodes': 10}

In [40]:
# prediction
dtc_best_pred = dtc_best_model.best_estimator_.predict(X_test)

# calcul du score
print("le score est de ", dtc_best_model.best_score_)

# calcul de l'accuracy
dtc_best_acc = accuracy_score(Y_test, dtc_best_pred)
print("l'accuracy est de ", dtc_best_acc)


le score est de  0.8318198597268365
l'accuracy est de  0.8152173913043478


## 4.5. Comparaison entre les modèles

In [43]:
print("AdaBoost : ", ada_best_acc)
print("KNN : ", knn_best_acc)
print("Naive Bayes : ", gnb_best_acc)
print("LogisticRegression : ", lr_best_acc)
print("Arbre de décision : ", dtc_best_acc)

models = [
    "AdaBoost",
    "KNN",
    "Naive Bayes",
    "LogisticRegression",
    "Arbre de décision"
]

accs = [
    ada_best_acc,
    knn_best_acc,
    gnb_best_acc,
    lr_best_acc,
    dtc_best_acc
]

best_model = ""
best_acc = 0
for i in range(0,4):
    if accs[i] > best_acc :
        best_acc = accs[i]
        best_model = models[i]

print()
print("Pour notre jeu de données, le meilleur modèle est ", best_model)

AdaBoost :  0.8478260869565217
KNN :  0.855072463768116
Naive Bayes :  0.8369565217391305
LogisticRegression :  0.8369565217391305
Arbre de décision :  0.8152173913043478

Pour notre jeu de données, le meilleur modèle est  KNN
