### Despliegue de modelos de aprendizaje automático

Reutilizamos el cuaderno anterior para experimentos de despliegue.

In [None]:
import pandas as pd
import numpy as np

import seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

In [None]:
df = pd.read_csv('data.csv')

df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df['TotalCharges'] = df['TotalCharges'].fillna(0)

df.columns = df.columns.str.lower().str.replace(' ', '_')

cadena_columnas = list(df.dtypes[df.dtypes == 'object'].index)

for col in cadena_columnas:
    df[col] = df[col].str.lower().str.replace(' ', '_')

df.churn = (df.churn == 'yes').astype(int)

Modelo completo anterior

In [None]:
df_train_completo, df_test = train_test_split(df, test_size=0.2, random_state=1)

df_train_completo = df_train_completo.reset_index(drop=True)
df_test = df_test.reset_index(drop=True)


df_train, df_val = train_test_split(df_train_completo, test_size=0.33, random_state=11)

df_train = df_train.reset_index(drop=True)
df_val = df_val.reset_index(drop=True)

y_train = df_train.churn.values
y_val = df_val.churn.values

del df_train['churn']
del df_val['churn']

In [None]:
categoricas = ['gender', 'seniorcitizen', 'partner', 'dependents',
               'phoneservice', 'multiplelines', 'internetservice',
               'onlinesecurity', 'onlinebackup', 'deviceprotection',
               'techsupport', 'streamingtv', 'streamingmovies',
               'contract', 'paperlessbilling', 'paymentmethod']
numericas = ['tenure', 'monthlycharges', 'totalcharges']


In [None]:
import warnings
warnings.filterwarnings("ignore")

#### Mejores parámetros


In [None]:
def entrenamiento(df, y, C=1.0):
    cat = df[categoricas + numericas].to_dict(orient='records') 
    dv = DictVectorizer(sparse=False)
    dv.fit(cat)

    X = dv.transform(cat)

    modelRL = LogisticRegression(solver='liblinear')
    modelRL.fit(X, y)

    return dv, modelRL

def predict(df, dv, modelo):
    cat = df[categoricas+ numericas].to_dict(orient='records')
    X = dv.transform(cat)

    y_pred = modelo.predict_proba(X)[:, 1]

    return y_pred

Ahora entrenamos el modelo en todos los conjuntos de datos de entrenamiento y validación y aplicarlo al conjunto de datos de prueba para verificar que realmente funciona bien.


In [None]:
y_train = df_train_completo.churn.values
y_test = df_test.churn.values

dv, modelo = entrenamiento(df_train_completo, y_train, C=0.5)
y_pred = predict(df_test, dv, modelo)

auc = roc_auc_score(y_test, y_pred)
print('auc = %.3f' % auc)

In [None]:
cliente = {
    'customerid': '8879-zkjof',
    'gender': 'female',
    'seniorcitizen': 0,
    'partner': 'no',
    'dependents': 'no',
    'tenure': 41,
    'phoneservice': 'yes',
    'multiplelines': 'no',
    'internetservice': 'dsl',
    'onlinesecurity': 'yes',
    'onlinebackup': 'no',
    'deviceprotection': 'yes',
    'techsupport': 'yes',
    'streamingtv': 'yes',
    'streamingmovies': 'yes',
    'contract': 'one_year',
    'paperlessbilling': 'yes',
    'paymentmethod': 'bank_transfer_(automatic)',
    'monthlycharges': 79.85,
    'totalcharges': 3320.75
}

In [None]:
df = pd.DataFrame([cliente])
y_pred = predict(df, dv, modelo)
y_pred[0]

In [None]:
def predict_cliente(cliente, dv, modelo):
    X = dv.transform([cliente])
    y_pred = modelo.predict_proba(X)[:, 1]
    return y_pred[0]

In [None]:
predict_cliente(cliente, dv, modelo)