In [30]:
import joblib
import pickle
import json
import pandas as pd 
import numpy as np
from production_training import ProductionTraining
import re
import inspect

pt = ProductionTraining()

### Read configs

In [31]:
config_path = './production/configurations/production_configuration.txt'
configs = pt.read_json_to_class(config_path)

### Load model

In [32]:
configs.grid_search.last_model_path

'./production/grid_search_models/gread_search_2021-04-24.pkl'

In [33]:
model=joblib.load(configs.grid_search.last_model_path)

### Load samples

In [34]:
path =configs.sanity.sanity_check_path
with open(path, 'r') as outfile:
    samples = json.load(outfile)

request_id = list(samples.keys())[0]
print(f"Request id {request_id} for example:\n\n{samples[request_id]}")

Request id -9.11278e+18_-2.86173e+17 for example:

{'state': 'TX', 'product': 'ho4', 'square_ft': 1099.0, 'has_fire_alarm': False, 'has_burglar_alarm': False, 'portable_electronics': False, 'coast': 4, 'fire_housing_proximity': 3, 'previous_policies': 1, 'user_age': 34.0, 'card_type': 'debit', 'number_of_na': 2, 'grouped_postal_code': 'generall_postal_code', 'median_household_income': 35449.0}


### Transform data

###### In a real production system this section should be much bigger. 
###### Each logic which applied over the research section should be applied here also, before generating prediction, for example:
######   - Bucketing/mapping values 
######   - 'None' counter - (one of our features)
######   - Convert values from str to numeric and back
######   - etc'
###### Here, the data we predict on, is already transformed...
###### But in order to make the point let's build and apply one transformer: remove characters - for "fire housing proximity" feature

#### Function logic 

In [35]:
lines = inspect.getsource(pt.remove_characters)
print(lines)

    @staticmethod
    def remove_characters(feature_value: str):
        '''
        The following function clean characters from strings
        :param feature_value: str
        :return: int

        example :remove_characters(feature_value ='8x') --> 8
        '''
        if isinstance(feature_value, str):
            value = int(re.search(r'\d+', feature_value).group())
            return value
        if isinstance(feature_value, int):
            return feature_value



In [36]:
feature = 'fire_housing_proximity'
for k in samples.keys():
    samples[k][feature] = pt.remove_characters(feature_value = samples[k][feature])

### Get prediction per sample-id

###### There is mant "True" cause i sampled from the resampled data set:]

In [37]:
for k,v in samples.items():
    array = pd.DataFrame([v]) *1 
    p = model.predict(array)
    print(f"id {k}, predict = {p[0]}")

id -9.11278e+18_-2.86173e+17, predict = False
id -9.02622e+18_1.45668e+18, predict = True
id -9.17817e+18_-6.39372e+18, predict = True
id -9.12043e+18_-5.90665e+18, predict = False
id -8.85995e+18_7.46945e+18, predict = True
