In [None]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import numpy as np

In [None]:
X, y = make_classification(n_samples=500, n_features=2, n_redundant=0, n_classes=2, random_state=42)

# n_sample = nombres d'observation
# n_features = nombres de variables explicatives
# n_redundant = eliminer les valeurs qui se repetes
# N)classes = mbre classes ou labels pour pour la classification
# random_state = cela permet de generer les valeurs 

In [None]:
print(f"le nombres de lignes et de colonnes de X est {X.shape}")
print(f"le nombres de lignes et de colonnes de y est {y.shape}")

---
# 2. Découpons les données en données d’apprentissage et données de test
---

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
print(f"X_train est sous la forme de {X_train.shape} et y_train est sous forme de {y_train.shape}")
print(f"X_test est sous la forme de {X_test.shape} et y_test est sous forme de {y_test.shape}")

---
# **3**
---

# > Programmer l’algorithme de Gradient Descent sur la régression logisitique

### Etap 1 : redimensionnement des variables sous forme matricielle

In [None]:
X_train = X_train.T
y_train = y_train.reshape(1, X_train.shape[1])
X_test = X_test.T
y_test = y_test.reshape(1, X_test.shape[1])


In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

### Etape 2 : Definir la fonction Sigmoid

La fonction segmoid est definir sur  :$f(x) = \frac{1}{1-\exp(-x)}$

In [None]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

### Etape 3 : Definir la fonction cost et gradient descente 

$cost = - \frac{1}{m}\sum_{i=1}^{m}[y*\log(a)+(1-y)*\log(1-a)]$

$ dW = \frac{dcost}{dW} = (A - Y)*X^{T} ... shape(1*n)$

$ dB = \frac{dCOST}{dB} = (A - Y)$

$W = W - \alpha*dWX^{T}$

$B = B-\alpha*dB$

In [None]:
def gradient_descent(X, Y, learning_rate, iterations):
    m = X_train.shape[1]
    n = X_train.shape[0]
 
    W = np.zeros((n,1))
    B = 0
 
    cost_list = []
 
    for i in range(iterations):
 
        Z = np.dot(W.T, X) + B
        A = sigmoid(Z)
 
        # cost function
        cost = -(1/m)*np.sum( Y*np.log(A) + (1-Y)*np.log(1-A))
 
        # Gradient Descent
        dW = (1/m)*np.dot(A-Y, X.T)
        dB = (1/m)*np.sum(A - Y)
 
        W = W - learning_rate*dW.T
        B = B - learning_rate*dB
 
        # Keeping track of our cost function value
        cost_list.append(cost)
 
        if(i%(iterations/10) == 0):
            print("Le cout avant ", i, "iteration est : ", cost)
 
    return W, B, cost_list

In [None]:
iterations = 100000
learning_rate = 0.001
W, B, cost_list = gradient_descent(X_train, y_train, learning_rate = learning_rate, iterations=iterations)

# > Prédiction des données de test

In [None]:
def accuracy(X, Y, W, B):
 
    Z = np.dot(W.T, X) + B
    A = sigmoid(Z)
 
    A = A > 0.5
 
    A = np.array(A, dtype = 'int64')
 
    acc = (1 - np.sum(np.absolute(A - Y))/Y.shape[1])*100
 
    print("Accuracy of the model is : ", round(acc, 2), "%")


# > Score

In [None]:
accuracy(X_test, y_test, W, B)

# Verification

**NB**

*Il faut re-executer le code mais ne pas redimensionner les variables*

In [None]:
model = LogisticRegression()
result = model.fit(X_train, y_train)

In [None]:
from sklearn import metrics
prediction_test = model.predict(X_test)
# Print the prediction accuracy
print (metrics.accuracy_score(y_test, prediction_test))