# Live 07 - Deploy de Modelos com Flask

Os templates HTML foram modificados para apresentação com base nesse [artigo](https://medium.com/star-gazers/building-churn-predictor-with-python-flask-html-and-css-fbab760e8441)

Os templates utilizados são do [Multi Boostrap Template](https://bootstrapmade.com/multi-responsive-bootstrap-template/)

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [2]:
df_treino = pd.read_csv('./data/train.csv')
df_teste = pd.read_csv('./data/test.csv')

In [3]:
df_treino.dtypes

id                    int64
gender               object
SeniorCitizen         int64
Partner              object
Dependents           object
tenure              float64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges         object
Churn                 int64
dtype: object

In [4]:
df_treino.columns

Index(['id', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [5]:
df_treino['Churn'].value_counts()

0    4139
1    1495
Name: Churn, dtype: int64

In [6]:
df_treino['Churn'].value_counts()[0] / len(df_treino['Churn'])

0.7346467873624423

In [7]:
df_treino.head()

Unnamed: 0,id,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,4030,Female,0,No,No,56.0,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,Two year,No,,45.05,2560.1,0
1,6731,Male,0,Yes,Yes,,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,One year,No,Mailed check,19.65,332.65,0
2,6479,Female,0,Yes,No,60.0,Yes,Yes,Fiber optic,No,...,Yes,Yes,Yes,Yes,Two year,Yes,Credit card (automatic),110.8,6640.7,0
3,6861,Female,0,No,No,37.0,Yes,Yes,Fiber optic,No,...,Yes,No,Yes,Yes,Month-to-month,Yes,Bank transfer (automatic),101.9,3545.35,1
4,3266,Male,0,Yes,Yes,29.0,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.75,1974.8,1


In [8]:
df_treino['TotalCharges'].dtype

dtype('O')

In [9]:
df_treino['TotalCharges'] = pd.to_numeric(df_treino['TotalCharges'], errors='coerce')
df_treino['TotalCharges'] = pd.to_numeric(df_treino['TotalCharges'], errors='coerce')


In [10]:
df_treino.dtypes

id                    int64
gender               object
SeniorCitizen         int64
Partner              object
Dependents           object
tenure              float64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges        float64
Churn                 int64
dtype: object

In [11]:
df_treino.isna().sum()

id                    0
gender                0
SeniorCitizen         0
Partner               0
Dependents          218
tenure              461
PhoneService          0
MultipleLines         0
InternetService       0
OnlineSecurity        0
OnlineBackup          0
DeviceProtection      0
TechSupport           0
StreamingTV           0
StreamingMovies       0
Contract              0
PaperlessBilling      0
PaymentMethod        99
MonthlyCharges        0
TotalCharges          8
Churn                 0
dtype: int64

In [12]:
df_treino = df_treino.dropna()
df_treino.isna().sum()

id                  0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64

In [13]:
X = df_treino.drop(['id', 'Churn'], axis=1)
y = df_treino[['Churn']]

In [14]:
X_treino, X_valid, y_treino, y_valid = train_test_split(X, y)

In [15]:
X_treino.shape, X_valid.shape, y_treino.shape, y_valid.shape

((3657, 19), (1219, 19), (3657, 1), (1219, 1))

In [16]:
X_treino.isna().sum()

gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
dtype: int64

In [17]:
X_treino.dtypes

gender               object
SeniorCitizen         int64
Partner              object
Dependents           object
tenure              float64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges        float64
dtype: object

In [18]:
colunas_categoricas = X_treino.select_dtypes(include="object").columns
colunas_numericas = X_treino.select_dtypes(exclude="object").columns

In [19]:
colunas_categoricas

Index(['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
       'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
       'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract',
       'PaperlessBilling', 'PaymentMethod'],
      dtype='object')

In [20]:
colunas_numericas

Index(['SeniorCitizen', 'tenure', 'MonthlyCharges', 'TotalCharges'], dtype='object')

In [21]:
OHE = OneHotEncoder()
scaler = StandardScaler()
RFC = RandomForestClassifier()


transformer = ColumnTransformer([('cat_cols', OHE, colunas_categoricas),
                                ('num_cols', scaler, colunas_numericas)])

pipe = Pipeline([("preprocessing", transformer),
                ("classifier", RFC)])

pipe.fit(X_treino, y_treino)

  self._final_estimator.fit(Xt, y, **fit_params)


Pipeline(memory=None,
         steps=[('preprocessing',
                 ColumnTransformer(n_jobs=None, remainder='drop',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('cat_cols',
                                                  OneHotEncoder(categories='auto',
                                                                drop=None,
                                                                dtype=<class 'numpy.float64'>,
                                                                handle_unknown='error',
                                                                sparse=True),
                                                  Index(['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
       'Internet...
                 RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                        class_weight=None, crit

In [22]:
X_valid.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges'],
      dtype='object')

In [23]:
predicoes = pipe.predict(X_valid)
predicoes[:10]

array([0, 0, 0, 0, 0, 1, 1, 0, 0, 0], dtype=int64)

In [24]:
y_valid.values[:10].flatten()

array([0, 0, 0, 1, 0, 1, 1, 0, 0, 0], dtype=int64)

In [25]:
accuracy_score(y_valid, predicoes)

0.8039376538146021

In [26]:
# Exportando o modelo final para implantação
import pickle

pickle.dump(pipe, open('./models/pipe.pkl', 'wb'))
