### 3.Traitement des données

#### 3.1. Importer les bibliothèques adéquates

#### 3.2. Importer le jeu de données (data-set)

In [1]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
#afficher fichier credit_immo csv
dataimmo = pd.read_csv("./Data/credit_immo.csv")
dataimmo.head()

Unnamed: 0,ID_NOM,Niv_Etude_Bac,age,contrat_de_travail,Salaire,dette_anterieure,etat_civile,apport,enfant_a_Charge,Solvable
0,jean,3.0,45,CDI,40000,4000,M,0.3,3.0,OUI
1,VANESSA,5.0,28,CDI,30500,1320,M,0.1,0.0,OUI
2,TARCISSE,0.0,55,CDI,28000,40000,C,0.0,0.0,NON
3,TIBAULT,4.0,23,CDD,15000,0,M,0.1,,OUI
4,GILES,0.0,33,CDD,27000,3000,C,0.1,2.0,NON


#### 3.3.Transformer les valeurs manquantes en moyenne (SimpleImputer)

In [3]:
#Les entrées ne prennent pas en compte la solvabilité et les noms
X=dataimmo.iloc[:,-9:-1].values

#### 3.4.1 Transformer les valeurs manquantes

In [4]:
#Transformation des valeurs manquantes (nan) en moyenne
imptr=SimpleImputer(missing_values=np.nan, strategy='mean')

#Adapter le model aux données
imptr.fit(X[:,0:1])
imptr.fit(X[:,7:8])

X[:,0:1] = imptr.transform(X[:,0:1])
X[:,7:8] = imptr.transform(X[:,7:8])


#### 3.4.2 Encoder les valeurs catégoriques (LabelEncoder)

In [5]:
from sklearn.preprocessing import LabelEncoder

labelencoder = LabelEncoder ()

#Encodage de la colonne 0 du tableau qui est catégorielle --> prenom
X[:, 2] = labelencoder.fit_transform (X [:, 2])
X[:, 2]

array([1, 1, 1, 0, 0, 3, 1, 3, 1, 1, 1, 0, 3, 1, 0, 2, 1, 1, 1],
      dtype=object)

In [6]:
#Avec LabelEncoder
labelencoder = LabelEncoder()

#Encodage de la colonne 5 du tableau qui est catégorielle -->  etat civil
X[:,5]=labelencoder.fit_transform(X[:,5])

#### La sortie correspond à la solvabilité des clients

In [7]:
y=dataimmo.iloc[:,-1].values
y=y.reshape(y.shape[0],1)
print(y.shape)
print(X.shape)

(19, 1)
(19, 8)


#### Normalisation des valeurs de X

In [8]:
scaler = StandardScaler()
X= scaler.fit_transform(X)
print(X)

[[ 0.58695071  0.69964472 -0.16929979 -0.15939982 -0.34223088  0.67936622
   1.24354001  1.32637071]
 [ 1.33877521 -0.77738303 -0.16929979 -0.61392855 -0.45846885  0.67936622
  -0.33161067 -1.06109657]
 [-0.54078605  1.56848458 -0.16929979 -0.73354137  1.21917467 -1.47196014
  -1.11918601 -1.06109657]
 [ 0.96286296 -1.21180295 -1.24153183 -1.35552804 -0.51572038  0.67936622
  -0.33161067  0.        ]
 [-0.54078605 -0.3429631  -1.24153183 -0.7813865  -0.38560325 -1.47196014
  -0.33161067  0.53054828]
 [-0.54078605  0.35210878  1.97516427 -0.15939982 -0.47234801  0.67936622
  -1.11918601  2.12219314]
 [-0.9166983  -0.69049904 -0.16929979  0.31905146 -0.38560325 -1.47196014
  -1.11918601 -1.06109657]
 [-1.6685228  -1.12491897  1.97516427 -0.68569624 -0.2988585  -1.47196014
  -1.11918601 -1.06109657]
 [-0.03956971 -0.951151   -0.16929979  0.03198069 -0.51572038  0.67936622
  -0.33161067 -0.26527414]
 [ 0.58695071  0.00457284 -0.16929979  0.11810192 -0.42897563  0.67936622
   0.45596467  0.

#### 3.5. Fractionner le jeu de données pour l’entrainement et le test (Training and Test set)

In [9]:
X_app, X_test, y_app, y_test = train_test_split(X,y,test_size=0.2,random_state=3)
print(X_test)

[[-1.6685228   0.17834081  1.97516427 -0.68569624 -0.30319574  0.67936622
  -1.11918601 -0.26527414]
 [-0.54078605  1.56848458 -0.16929979 -0.73354137  1.21917467 -1.47196014
  -1.11918601 -1.06109657]
 [ 1.33877521 -0.77738303 -0.16929979 -0.61392855 -0.45846885  0.67936622
  -0.33161067 -1.06109657]
 [ 2.46651197  0.78652871 -0.16929979  1.75440532 -0.16874137  0.67936622
  -1.11918601  1.32637071]]


In [10]:
print(X_app.shape, X_test.shape, y_app.shape, y_test.shape)

(15, 8) (4, 8) (15, 1) (4, 1)


#### 3.6. mise à l’échelle des features : StandardScaler

In [11]:
#Normalisation des valeurs de X
scaler = StandardScaler()
print(X_test)
#print(X_app)
print('===========================')
X_test2 = scaler.fit_transform(X_test)
print(X_test2)

[[-1.6685228   0.17834081  1.97516427 -0.68569624 -0.30319574  0.67936622
  -1.11918601 -0.26527414]
 [-0.54078605  1.56848458 -0.16929979 -0.73354137  1.21917467 -1.47196014
  -1.11918601 -1.06109657]
 [ 1.33877521 -0.77738303 -0.16929979 -0.61392855 -0.45846885  0.67936622
  -0.33161067 -1.06109657]
 [ 2.46651197  0.78652871 -0.16929979  1.75440532 -0.16874137  0.67936622
  -1.11918601  1.32637071]]
[[-1.28745262 -0.30382181  1.73205081 -0.58444479 -0.56019702  0.57735027
  -0.57735027  0.        ]
 [-0.58520574  1.31656118 -0.57735027 -0.62983856  1.71165918 -1.73205081
  -0.57735027 -0.81649658]
 [ 0.58520574 -1.41783511 -0.57735027 -0.51635413 -0.79191341  0.57735027
   1.73205081 -0.81649658]
 [ 1.28745262  0.40509575 -0.57735027  1.73063748 -0.35954875  0.57735027
  -0.57735027  1.63299316]]
