#### Importation des packakges 

In [131]:
import numpy as np 
import pandas as pd 
import seaborn as sns 
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.decomposition import PCA 
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from  sklearn.linear_model import LogisticRegression

#### Chargement du jeu de donnees 

In [18]:
data = sns.load_dataset("iris")
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [12]:
data.species.value_counts()

setosa        50
versicolor    50
virginica     50
Name: species, dtype: int64

#### Separation du dataset en une matrice de features( X) et target (y )

In [15]:
X = data.drop(columns=["species"])
y = data.loc[:, "species"]

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=49)

#### Normaliser le jeu de donnees 

In [108]:
target_names = list(X_train.columns)
scaler = StandardScaler()
X_train_scaler = scaler.fit_transform(X_train)
X_train_scaler = pd.DataFrame(X_train_scaler, columns=target_names )
X_train_scaler


array([0.82536654, 0.39744764, 1.74900399, 0.76407192])

In [103]:
# Etape 1 : instanciation du model 
model = LogisticRegression()

# Eatape 2 : Entrainement du model 
model.fit(X_train_scaler, y_train)


LogisticRegression()

In [104]:
new_fleurs = X_train_scaler.iloc[0,:]

In [105]:
new_fleurs_data = pd.DataFrame(data=new_fleurs.values.reshape(-1, 4), columns = new_fleurs.index)
new_fleurs_data

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,-0.384245,-1.682162,0.060443,-0.102209


In [106]:
model.predict(new_fleurs_data)

array(['versicolor'], dtype=object)

In [111]:
X_test_scaler = scaler.transform(X_test)
X_test_scaler = pd.DataFrame(X_test_scaler, columns=target_names )

In [112]:
model.score(X_test_scaler, y_test)

0.9555555555555556

In [66]:
def score(X, y): 
    pred = model.predict(X)

    score = (pred == y).sum() / y.shape[0] 

    return score 

In [67]:
score(X_test, y_test)

0.9333333333333333

#### Reglage des hyperparametres 

In [72]:
model.get_params()

{'C': 1.0,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 100,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': None,
 'solver': 'lbfgs',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [84]:
param = { 
    'max_iter': [100, 400],
    'tol': [0.0001, 0.002]

}

In [85]:
grid = GridSearchCV(model, param_grid=param)

In [86]:
grid.fit(X_train, y_train)

GridSearchCV(estimator=LogisticRegression(),
             param_grid={'max_iter': array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
       113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
       126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138,
       139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151,
       152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
       165, 166, 167, 168,...
       0.30612245, 0.32653061, 0.34693878, 0.36734694, 0.3877551 ,
       0.40816327, 0.42857143, 0.44897959, 0.46938776, 0.48979592,
       0.51020408, 0.53061224, 0.55102041, 0.57142857, 0.59183673,
       0.6122449 , 0.63265306, 0.65306122, 0.67346939, 0.69387755,
       0.71428571, 0.73469388, 0.75510204, 0.7755102 , 0.79591837,
       0.81632653, 0.83673469, 0.85714286, 0.87755102, 0.89795918,
       0.91836735, 0.93877551, 0.95918367, 0.97959184, 1.        ])})

In [87]:
grid.best_params_

{'max_iter': 100, 'tol': 0.8163265306122448}

In [88]:
model = grid.best_estimator_

In [90]:
model.score(X_test , y_test)

0.9333333333333333

In [96]:
X_train 

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
80,5.5,2.4,3.8,1.1
146,6.3,2.5,5.0,1.9
24,4.8,3.4,1.9,0.2
129,7.2,3.0,5.8,1.6
145,6.7,3.0,5.2,2.3
...,...,...,...,...
124,6.7,3.3,5.7,2.1
126,6.2,2.8,4.8,1.8
133,6.3,2.8,5.1,1.5
140,6.7,3.1,5.6,2.4


In [98]:
list(X_train.columns)

['sepal_length', 'sepal_width', 'petal_length', 'petal_width']

In [101]:
X_train 

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
80,5.5,2.4,3.8,1.1
146,6.3,2.5,5.0,1.9
24,4.8,3.4,1.9,0.2
129,7.2,3.0,5.8,1.6
145,6.7,3.0,5.2,2.3
...,...,...,...,...
124,6.7,3.3,5.7,2.1
126,6.2,2.8,4.8,1.8
133,6.3,2.8,5.1,1.5
140,6.7,3.1,5.6,2.4


#### Utilisation des pipline 

In [127]:
model_pipe =  make_pipeline( StandardScaler(),VarianceThreshold(),LogisticRegression())

In [128]:
model_pipe.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('variancethreshold', VarianceThreshold(threshold=0.9)),
                ('logisticregression', LogisticRegression())])

In [129]:
model_pipe.score(X_test, y_test)

0.9555555555555556