# Modèle Naive Bayes

## Télechargement des packages nécessaires

In [17]:
import numpy as np
import pandas as pd
import pickle
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

## Télechargement le data set

In [2]:
with open('variables.pkl', 'rb') as f:
    fold_indices, df = pickle.load(f)

In [3]:
print(df.columns)

Index(['comportement_risqueSexuel', 'comportement_alimentation',
       'comportement_hygienePersonnelle', 'intention_aggrégation',
       'intention_engagement', 'attitude_cohérence', 'attitude_spontanéité',
       'norme_personneImportante', 'norme_accomplissement',
       'perception_vulnérabilité', 'perception_sévérité', 'motivation_force',
       'motivation_volonté', 'soutienSocial_émotionnel',
       'soutienSocial_appréciation', 'soutienSocial_instrumental',
       'autonomisation_connaissance', 'autonomisation_compétences',
       'autonomisation_désirs', 'cancer_colUterus'],
      dtype='object')


In [14]:
print(df.shape)

(72, 20)


## Importer les folds (training set , test set)

In [8]:
print(fold_indices['fold_1'])

{'train': array([ 1,  2,  3,  6,  7,  8, 11, 13, 14, 15, 16, 17, 19, 20, 21, 23, 24,
       25, 26, 27, 29, 30, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
       46, 48, 51, 52, 53, 55, 57, 58, 59, 60, 61, 63, 64, 65, 66, 67, 68,
       69, 70, 71]), 'test': array([ 0,  4,  5,  9, 10, 12, 18, 22, 28, 31, 34, 45, 47, 49, 50, 54, 56,
       62])}


In [10]:
fold_1 = fold_indices['fold_1']
print("Train indices Fold 1:", fold_1['train'])
print("Test indices Fold 1:", fold_1['test'])
print("\n")
fold_2 = fold_indices['fold_2']
print("Train indices Fold 2:", fold_2['train'])
print("Test indices Fold 2:", fold_2['test'])
print("\n")
fold_3 = fold_indices['fold_3']
print("Train indices Fold 3:", fold_3['train'])
print("Test indices Fold 3:", fold_3['test'])
print("\n")
fold_4 = fold_indices['fold_4']
print("Train indices Fold 4:", fold_4['train'])
print("Test indices Fold 4:", fold_4['test'])

Train indices Fold 1: [ 1  2  3  6  7  8 11 13 14 15 16 17 19 20 21 23 24 25 26 27 29 30 32 33
 35 36 37 38 39 40 41 42 43 44 46 48 51 52 53 55 57 58 59 60 61 63 64 65
 66 67 68 69 70 71]
Test indices Fold 1: [ 0  4  5  9 10 12 18 22 28 31 34 45 47 49 50 54 56 62]


Train indices Fold 2: [ 0  1  2  3  4  5  6  8  9 10 11 12 14 15 17 18 20 21 22 23 24 26 27 28
 29 31 32 34 36 37 38 41 43 45 47 48 49 50 51 52 53 54 56 57 60 61 62 63
 64 65 66 67 68 70]
Test indices Fold 2: [ 7 13 16 19 25 30 33 35 39 40 42 44 46 55 58 59 69 71]


Train indices Fold 3: [ 0  1  2  4  5  7  9 10 11 12 13 14 16 18 19 20 21 22 23 25 28 29 30 31
 32 33 34 35 37 39 40 42 43 44 45 46 47 49 50 51 52 54 55 56 58 59 60 62
 63 65 67 68 69 71]
Test indices Fold 3: [ 3  6  8 15 17 24 26 27 36 38 41 48 53 57 61 64 66 70]


Train indices Fold 4: [ 0  3  4  5  6  7  8  9 10 12 13 15 16 17 18 19 22 24 25 26 27 28 30 31
 33 34 35 36 38 39 40 41 42 44 45 46 47 48 49 50 53 54 55 56 57 58 59 61
 62 64 66 69 70 71]
Test indice

## Pre-traitement les données

### le jeu de donnees pour chaque fold

In [11]:
train_1 = df.iloc[fold_1['train'],:]
train_2 = df.iloc[fold_2['train'],:]
train_3 = df.iloc[fold_3['train'],:]
train_4 = df.iloc[fold_4['train'],:]

In [20]:
test_1 = df.iloc[fold_1['test'],:]
test_2 = df.iloc[fold_2['test'],:]
test_3 = df.iloc[fold_3['test'],:]
test_4 = df.iloc[fold_4['test'],:]

### Matrice des attributs et La variable ciblée

#### Fold 1

In [25]:
# Créer X_train_1 et Y_train_1
X_train_1 = train_1.drop(["cancer_colUterus"], axis=1)
print(f"Dimensions de X_train_1 : {X_train_1.shape}")  # Affiche la taille de X_train_1

Y_train_1 = train_1["cancer_colUterus"]
print(f"Dimensions de Y_train_1 : {Y_train_1.shape}")  # Affiche la taille de Y_train_1

# Créer X_test_1 et Y_test_1
X_test_1 = test_1.drop(["cancer_colUterus"], axis=1)
print(f"Dimensions de X_test_1 : {X_test_1.shape}")  # Affiche la taille de X_test_1

Y_test_1 = test_1["cancer_colUterus"]
print(f"Dimensions de Y_test_1 : {Y_test_1.shape}")  # Affiche la taille de Y_test_1

# Convertir les données en tableaux NumPy
X_train_1 = X_train_1.to_numpy()  # Enlever les noms des caractéristiques
X_test_1 = X_test_1.to_numpy()

Dimensions de X_train_1 : (54, 19)
Dimensions de Y_train_1 : (54,)
Dimensions de X_test_1 : (18, 19)
Dimensions de Y_test_1 : (18,)


In [26]:
# Créer X_train_2 et Y_train_2
X_train_2 = train_2.drop(["cancer_colUterus"], axis=1)
print(f"Dimensions de X_train_2 : {X_train_2.shape}")  # Affiche la taille de X_train_2

Y_train_2 = train_2["cancer_colUterus"]
print(f"Dimensions de Y_train_2 : {Y_train_2.shape}")  # Affiche la taille de Y_train_2

# Créer X_test_2 et Y_test_2
X_test_2 = test_2.drop(["cancer_colUterus"], axis=1)
print(f"Dimensions de X_test_2 : {X_test_2.shape}")  # Affiche la taille de X_test_2

Y_test_2 = test_2["cancer_colUterus"]
print(f"Dimensions de Y_test_2 : {Y_test_2.shape}")  # Affiche la taille de Y_test_2

# Convertir les données en tableaux NumPy
X_train_2 = X_train_2.to_numpy()  # Enlever les noms des caractéristiques
X_test_2 = X_test_2.to_numpy()

Dimensions de X_train_2 : (54, 19)
Dimensions de Y_train_2 : (54,)
Dimensions de X_test_2 : (18, 19)
Dimensions de Y_test_2 : (18,)


In [27]:
# Créer X_train_3 et Y_train_3
X_train_3 = train_3.drop(["cancer_colUterus"], axis=1)
print(f"Dimensions de X_train_3 : {X_train_3.shape}")  # Affiche la taille de X_train_3

Y_train_3 = train_3["cancer_colUterus"]
print(f"Dimensions de Y_train_3 : {Y_train_3.shape}")  # Affiche la taille de Y_train_3

# Créer X_test_3 et Y_test_3
X_test_3 = test_3.drop(["cancer_colUterus"], axis=1)
print(f"Dimensions de X_test_3 : {X_test_3.shape}")  # Affiche la taille de X_test_3

Y_test_3 = test_3["cancer_colUterus"]
print(f"Dimensions de Y_test_3 : {Y_test_3.shape}")  # Affiche la taille de Y_test_3

# Convertir les données en tableaux NumPy
X_train_3 = X_train_3.to_numpy()  # Enlever les noms des caractéristiques
X_test_3 = X_test_3.to_numpy()

Dimensions de X_train_3 : (54, 19)
Dimensions de Y_train_3 : (54,)
Dimensions de X_test_3 : (18, 19)
Dimensions de Y_test_3 : (18,)


In [28]:
# Créer X_train_4 et Y_train_4
X_train_4 = train_4.drop(["cancer_colUterus"], axis=1)
print(f"Dimensions de X_train_4 : {X_train_4.shape}")  # Affiche la taille de X_train_4

Y_train_4 = train_4["cancer_colUterus"]
print(f"Dimensions de Y_train_4 : {Y_train_4.shape}")  # Affiche la taille de Y_train_4

# Créer X_test_4 et Y_test_4
X_test_4 = test_4.drop(["cancer_colUterus"], axis=1)
print(f"Dimensions de X_test_4 : {X_test_4.shape}")  # Affiche la taille de X_test_4

Y_test_4 = test_4["cancer_colUterus"]
print(f"Dimensions de Y_test_4 : {Y_test_4.shape}")  # Affiche la taille de Y_test_4

# Convertir les données en tableaux NumPy
X_train_4 = X_train_4.to_numpy()  # Enlever les noms des caractéristiques
X_test_4 = X_test_4.to_numpy()

Dimensions de X_train_4 : (54, 19)
Dimensions de Y_train_4 : (54,)
Dimensions de X_test_4 : (18, 19)
Dimensions de Y_test_4 : (18,)
