In [78]:
import pandas as pd
import numpy as np

import seaborn as sns
from matplotlib import pyplot as pyplot
%matplotlib inline

import pickle
from sklearn.feature_extraction import DictVectorizer

In [79]:
def read_dataframe(filename: str):
    df = pd.read_csv(filename)
    return df

In [80]:
def preprocessor(df: pd.DataFrame):

    df['totalcharges'] = pd.to_numeric(df['totalcharges'], errors='coerce')
    df['totalcharges'] = df['totalcharges'].fillna(0)
    df['seniorcitizen'] = df['seniorcitizen'].replace({0: 'no', 1: 'yes'})

    df.columns = df.columns.str.lower().str.replace(' ','_')
    string_columns = list(df.dtypes[df.dtypes == 'object'].index)

    for col in string_columns:
        df[col] = df[col].str.lower().str.replace(' ','_')

    return df

In [81]:
with open('../notebooks/tracked/model.pkl', "rb") as f_in:
    model = pickle.load(f_in)

In [82]:
def prepareDictionaries(df: pd.DataFrame):
    categorical = ['gender', 'seniorcitizen', 'partner', 'dependents',
               'phoneservice', 'multiplelines', 'internetservice',
               'onlinesecurity', 'onlinebackup', 'deviceprotection',
               'techsupport', 'streamingtv', 'streamingmovies',
               'contract', 'paperlessbilling', 'paymentmethod']
    numerical = ['tenure', 'monthlycharges', 'totalcharges']

    dicts = df[categorical + numerical].to_dict(orient='records')
    return dicts

In [83]:
def dictionaryVectorizer(dictDF: dict):
    dv = DictVectorizer(sparse=False)
    dv.fit(dictDF)
    
    return dv.transform(dictDF)

In [84]:
df = read_dataframe('./tracked/test.csv')
df = preprocessor(df)

dicts = prepareDictionaries(df)

model_input = dictionaryVectorizer(dicts)

In [85]:
y_pred = model.predict(model_input)