# Simulate app form prediction process

## Load pipeline artifact

In [16]:
import pickle

path = '../../artifacts/pipeline_mpg.pkl'

with open(path, 'rb') as file:
    pipeline = pickle.load(file)

In [17]:
pipeline.feature_names_in_

array(['cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model_year', 'origin'], dtype=object)

## Estructure categorical data options

### Feature names and categories

In [18]:
processor = pipeline.steps[0][1]
encoder = processor.transformers_[0][1]

feature_names = encoder.feature_names_in_
feature_categories = encoder.categories_

In [19]:
feature_names

array(['cylinders', 'origin'], dtype=object)

In [20]:
feature_categories

[array(['3', '4', '5', '6', '8'], dtype=object),
 array(['europe', 'japan', 'usa'], dtype=object)]

In [21]:
options = {}
for feature, categories in zip(feature_names, feature_categories):
    options[feature] = categories.tolist()

options

{'cylinders': ['3', '4', '5', '6', '8'], 'origin': ['europe', 'japan', 'usa']}

### Export options to file

In [22]:
import json

with open('../../artifacts/options_categorical.json', 'w') as file:
    json.dump(options, file, indent=4)

## Estructure numerical data options

### Feature names

In [23]:
processor = pipeline.steps[0][1]
encoder = processor.transformers_[1][1]

feature_names = encoder.feature_names_in_.tolist()
feature_names

['displacement', 'horsepower', 'weight', 'acceleration', 'model_year']

### Average value by default

In [24]:
import pandas as pd

df = pd.read_csv('../../data/cars.csv', index_col=0)
df

Unnamed: 0_level_0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,70,usa
buick skylark 320,15.0,8,350.0,165.0,3693,11.5,70,usa
plymouth satellite,18.0,8,318.0,150.0,3436,11.0,70,usa
amc rebel sst,16.0,8,304.0,150.0,3433,12.0,70,usa
ford torino,17.0,8,302.0,140.0,3449,10.5,70,usa
...,...,...,...,...,...,...,...,...
ford mustang gl,27.0,4,140.0,86.0,2790,15.6,82,usa
vw pickup,44.0,4,97.0,52.0,2130,24.6,82,europe
dodge rampage,32.0,4,135.0,84.0,2295,11.6,82,usa
ford ranger,28.0,4,120.0,79.0,2625,18.6,82,usa


In [25]:
options = {}

for feature in feature_names:
    options[feature] = df[feature].mean()

In [26]:
options

{'displacement': 194.41198979591837,
 'horsepower': 104.46938775510205,
 'weight': 2977.5841836734694,
 'acceleration': 15.541326530612244,
 'model_year': 75.9795918367347}

### Export options to file

In [27]:
import json

with open('../../artifacts/options_numerical.json', 'w') as file:
    json.dump(options, file, indent=4)