In [108]:
import pandas as pd
import numpy as np
import random

import seaborn as sns
from matplotlib import pyplot as pyplot
%matplotlib inline

import pickle
from sklearn.feature_extraction import DictVectorizer

In [109]:
input_file = './tracked/test.csv'
output_file = 'output/test.csv'

In [110]:
def read_dataframe(filename: str):
    df = pd.read_csv(filename)
    return df

In [111]:
def preprocessor(df: pd.DataFrame):

    df['totalcharges'] = pd.to_numeric(df['totalcharges'], errors='coerce')
    df['totalcharges'] = df['totalcharges'].fillna(0)
    df['seniorcitizen'] = df['seniorcitizen'].replace({0: 'no', 1: 'yes'})

    df.columns = df.columns.str.lower().str.replace(' ','_')
    string_columns = list(df.dtypes[df.dtypes == 'object'].index)

    for col in string_columns:
        df[col] = df[col].str.lower().str.replace(' ','_')

    return df

In [112]:
with open('../notebooks/tracked/model.pkl', "rb") as f_in:
    model = pickle.load(f_in)

In [121]:
def prepareDictionaries(df: pd.DataFrame):
    categorical = ['gender', 'seniorcitizen', 'partner', 'dependents',
               'phoneservice', 'multiplelines', 'internetservice',
               'onlinesecurity', 'onlinebackup', 'deviceprotection',
               'techsupport', 'streamingtv', 'streamingmovies',
               'contract', 'paperlessbilling', 'paymentmethod']
    numerical = ['tenure', 'monthlycharges', 'totalcharges']

    dicts = df[categorical + numerical].to_dict(orient='records')
    return dicts

In [114]:
def dictionaryVectorizer(dictDF: dict):
    dv = DictVectorizer(sparse=False)
    dv.fit(dictDF)
    
    return dv.transform(dictDF)

In [116]:
df = read_dataframe(input_file)
df = preprocessor(df)

dicts = prepareDictionaries(df)

model_input = dictionaryVectorizer(dicts)

In [117]:
y_pred = model.predict(model_input)

In [118]:
def generate_msisdn(n):
    n = len(y_pred)
    msisdn = []
    for i in range(n):
        msisdn.append('2517' + str(random.randint(100000000, 999999999)))

    return msisdn

In [119]:
df_result = pd.DataFrame()

In [120]:
!mkdir output

A subdirectory or file output already exists.


In [107]:
# Model Versions Also Must be Delivered With The Data Which is Shipped to The Data Base

In [None]:
def load_model(run_id):
    logged_model = 'Path for the specific ML model and RUNID while assuming experiments will be tracked by MLflow'
    model = model.pyfunc.load_model(logged_model)
    return model

In [None]:
def apply_model(input_file, run_id, output_file):
    df = read_dataframe(input_file)
    dicts = prepareDictionaries(df)
    model = load_model(run_id)

    y_pred = model.predictions(df)

    return y_pred