In [12]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder,OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV,train_test_split,GridSearchCV
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report,roc_curve,auc
import xgboost as xgb
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

In [3]:
path = r'C:\Users\karl\Documents\datasets'
df_seg = pd.read_csv(r"C:\Users\karl\Documents\datasets\Segmentation.csv")
df_seg.head(10)

Unnamed: 0,CustomerID,Recency,Tenure,Frequency,MonetaryValue,Segment
0,12346,326,326,2,0.0,Fence Sitter
1,12349,19,19,73,1757.55,Potential
2,12350,310,310,17,334.4,Fence Sitter
3,12353,204,204,4,89.0,Fence Sitter
4,12354,232,232,58,1079.4,Fence Sitter
5,12355,214,214,13,459.4,Fence Sitter
6,12357,33,33,131,6207.67,Potential
7,12358,2,151,19,1168.06,Potential
8,12361,287,287,10,189.9,Fence Sitter
9,12364,8,112,85,1313.1,Potential


In [4]:
labels = df_seg.Segment.nunique()
labels

3

In [7]:
df_seg["Segment"].value_counts(ascending=True)

Loyal            429
Fence Sitter    1030
Potential       1587
Name: Segment, dtype: int64

In [8]:
df_seg["Segment"].value_counts(ascending=True,normalize=True)

Loyal           0.140840
Fence Sitter    0.338148
Potential       0.521011
Name: Segment, dtype: float64

In [9]:
df_seg.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3046 entries, 0 to 3045
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   CustomerID     3046 non-null   int64  
 1   Recency        3046 non-null   int64  
 2   Tenure         3046 non-null   int64  
 3   Frequency      3046 non-null   int64  
 4   MonetaryValue  3046 non-null   float64
 5   Segment        3046 non-null   object 
dtypes: float64(1), int64(4), object(1)
memory usage: 142.9+ KB


In [10]:
X = df_seg.drop(["CustomerID","Segment"],axis=1)
y = df_seg['Segment']
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=123,stratify=y)

In [13]:
ovr = OneVsRestClassifier(LogisticRegression(solver="lbfgs"))
ovr.fit(x_train,y_train)

In [18]:
def one_stat_result(model, x_train, y_train, x_test, y_test,b_seuil_label=False):
    train_predictions = model.predict(x_train.to_numpy())
    test_predictions = model.predict(x_test.to_numpy())
    if b_seuil_label==True:
        train_predictions = (train_predictions>=0.5).astype(int)
        test_predictions = (test_predictions>=0.5).astype(int)
    print("test_predictions:")
    print(test_predictions)
    cm = confusion_matrix(y_test, test_predictions)
    print(cm)
    print(">> resultat de la classification:")
    print(classification_report(y_test, test_predictions))
    TP = cm[0,0]
    TN = cm[1,1]
    FP = cm[0,1]
    FN = cm[1,0]
    classification_accuracy = (TP + TN) / float(TP + TN + FP + FN)
    print('Justesse de la Classification (accuracy) : {0:0.4f}'.format(classification_accuracy))
    classification_error = (FP + FN) / float(TP + TN + FP + FN)
    print('Erreurs de Classification : {0:0.4f}'.format(classification_error))
    recall = TP / float(TP + FN)
    print('Recall ou Sensitivity : {0:0.4f}'.format(recall))
    print("taux de faux positive:")
    false_positive_rate = FP / float(FP + TN)
    print(false_positive_rate)
    print('False Positive Rate : {0:0.4f}'.format(false_positive_rate))
    specificity = TN / (TN + FP)
    print('Specificity : {0:0.4f}'.format(specificity))

In [19]:
one_stat_result(ovr, x_train, y_train, x_test, y_test,b_seuil_label=False)

test_predictions:
['Potential' 'Potential' 'Fence Sitter' 'Potential' 'Fence Sitter'
 'Potential' 'Loyal' 'Potential' 'Potential' 'Fence Sitter' 'Fence Sitter'
 'Potential' 'Loyal' 'Potential' 'Potential' 'Potential' 'Potential'
 'Fence Sitter' 'Fence Sitter' 'Potential' 'Potential' 'Fence Sitter'
 'Potential' 'Potential' 'Potential' 'Fence Sitter' 'Potential'
 'Fence Sitter' 'Potential' 'Potential' 'Potential' 'Potential' 'Loyal'
 'Fence Sitter' 'Potential' 'Potential' 'Loyal' 'Fence Sitter' 'Potential'
 'Potential' 'Potential' 'Fence Sitter' 'Potential' 'Fence Sitter'
 'Fence Sitter' 'Fence Sitter' 'Potential' 'Fence Sitter' 'Fence Sitter'
 'Potential' 'Fence Sitter' 'Fence Sitter' 'Loyal' 'Potential'
 'Fence Sitter' 'Potential' 'Fence Sitter' 'Fence Sitter' 'Potential'
 'Potential' 'Potential' 'Potential' 'Fence Sitter' 'Potential'
 'Potential' 'Potential' 'Loyal' 'Fence Sitter' 'Potential' 'Fence Sitter'
 'Potential' 'Fence Sitter' 'Potential' 'Fence Sitter' 'Loyal' 'Potential'
 'P



In [21]:
ovr2 = OneVsRestClassifier(LogisticRegression(solver="liblinear",penalty='l1'))
ovr2.fit(x_train,y_train)
one_stat_result(ovr2, x_train, y_train, x_test, y_test,b_seuil_label=False)

test_predictions:
['Potential' 'Potential' 'Fence Sitter' 'Potential' 'Fence Sitter'
 'Potential' 'Loyal' 'Potential' 'Potential' 'Fence Sitter' 'Fence Sitter'
 'Potential' 'Loyal' 'Potential' 'Potential' 'Potential' 'Potential'
 'Fence Sitter' 'Fence Sitter' 'Potential' 'Potential' 'Fence Sitter'
 'Potential' 'Potential' 'Potential' 'Fence Sitter' 'Potential'
 'Fence Sitter' 'Potential' 'Potential' 'Potential' 'Potential' 'Loyal'
 'Fence Sitter' 'Potential' 'Potential' 'Loyal' 'Fence Sitter' 'Potential'
 'Potential' 'Potential' 'Fence Sitter' 'Potential' 'Fence Sitter'
 'Fence Sitter' 'Fence Sitter' 'Potential' 'Fence Sitter' 'Fence Sitter'
 'Potential' 'Fence Sitter' 'Fence Sitter' 'Loyal' 'Potential'
 'Fence Sitter' 'Potential' 'Fence Sitter' 'Fence Sitter' 'Potential'
 'Potential' 'Potential' 'Potential' 'Fence Sitter' 'Potential'
 'Potential' 'Potential' 'Loyal' 'Fence Sitter' 'Potential' 'Fence Sitter'
 'Potential' 'Fence Sitter' 'Potential' 'Fence Sitter' 'Loyal' 'Potential'
 'P

