# Clasificación multi-etiqueta

![mlr](../../images/mlr.png)


![telecom](../../images/telecom.jpeg)

In [None]:
import pandas as pd                                # panel data, for handling dataframes
pd.set_option('display.max_columns', None)         # show all columns of the dataframe

import numpy as np                                 # numerical python, linear algebra library

import pylab as plt                                # plotting library
import seaborn as sns                              # plotting library
sns.set(style='white')                             # seaborn style


from sklearn.linear_model import LogisticRegression, SGDClassifier # logistic regression model & Gradient Descent                  
from sklearn.preprocessing import MinMaxScaler               # standarized
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.metrics import f1_score, confusion_matrix

from sklearn.model_selection import train_test_split as tts    # split data into train and test sets

## Cargar Datos

In [None]:
df=pd.read_csv('../../../data/churn.csv')

df.head()

In [None]:
df.Contract.value_counts()

In [None]:
(df.Contract.value_counts()/len(df)).plot.bar(color=['b', 'r', 'g'],    
                                           figsize=(10, 6),
                                           title='Contract type',
                                           rot=0,
                                           fontsize=12);

## Transformacion

In [None]:
data_num=pd.DataFrame(MinMaxScaler().fit_transform(df._get_numeric_data()),  # standardize numeric columns
                      columns=df._get_numeric_data().columns)

data_obj=df.select_dtypes(include='object').drop('customerID',axis=1)  # get categoric columns


data=pd.concat([data_num, data_obj], axis=1)   # concatenate both dataframes

In [None]:
# En esta ocasión utilizaremos One Hot Encoder o dummies

dum = pd.get_dummies(data_obj.drop(['Contract'], axis=1))

In [None]:
data=pd.concat([data_num, dum, data_obj.Contract], axis=1) 

In [None]:
data.head()

In [None]:
#Tendremos que codificar también nuestra variable dependiente
le = LabelEncoder()

data.Contract = le.fit_transform(data.Contract)

data.head()

In [None]:
(data.Contract.value_counts()/len(data)).plot.bar(color=['b', 'r', 'g'],    
                                           figsize=(10, 6),
                                           title='Contract type',
                                           rot=0,
                                           fontsize=12);

## Modelado

In [None]:
X=data.drop(['Contract', 'ChurnBinary'], axis=1)   
# all data except target and ChurnBinary because it`s the same than Churn

y=data.Contract               
# target data

In [None]:
X_train, X_test, y_train, y_test=tts(X, y, random_state=42, stratify=y)

In [None]:
lr=LogisticRegression(max_iter=2000)
rf = RandomForestClassifier()
gd = SGDClassifier()
xgb = XGBClassifier()

lr.fit(X_train, y_train)
rf.fit(X_train, y_train)
gd.fit(X_train, y_train)
xgb.fit(X_train, y_train)

models = [lr, rf, gd, xgb]

In [None]:
res = {}
for model in models:
    name = str(model).split('(')[0]
    print(f'Entrenando {name} .....\n')
    model.fit(X_train, y_train)
    print(f'{name} Entrenado \n')
    print(f'Realizando predicciones .... \n')
    pred = model.predict(X_train)
    pred_test = model.predict(X_test)
    print(f'Evaluando {name} .....\n')
    f1 = f1_score(y_train, pred, average='macro')
    f1_test = f1_score(y_test, pred_test,average='macro')
    cm = confusion_matrix(y_train, pred)
    cm_test = confusion_matrix(y_test, pred_test)
    
    res[name]={'f1_train': f1,
               'f1_test': f1_test}
    
    print(f'Resultados {name} \n')
    
    print(res[name])
    
    plt.figure(figsize=(12,8))
    sns.heatmap(cm, annot=True)
    plt.title(f'{name} Confusion Matrix Train')
    plt.show();
    plt.figure(figsize=(12,8))
    sns.heatmap(cm_test, annot=True)
    plt.title(f'{name} Confusion Matrix Trest')
    plt.show();
    

In [None]:
pd.DataFrame(res)