In [None]:
import cPickle as pickle
import pandas as pd

## Load fitted model and relevant other data

In [None]:
# read in fitted model and needed preprocessing data
modelFname = 'enet.pyobj'
dataFname = '/home/jovyan/work/data/autot4.7.csv'

inputs = pickle.load(open(modelFname, 'rb'))
model = inputs['model']
imputer = inputs['imputer']
scaler = inputs['scaler']
trainColumns = inputs['trainColumns']
factorVars = inputs['factorVars']
numericVars= inputs['numericVars']
dateVars = inputs['dateVars']

# remove target variable from numeric variables
numericVars.remove('matkamittarilukema')
numericVars.remove('mileagePerDay')

In [None]:
# read some original data
#data = pd.read_csv(dataFname, sep=';', nrows=10)
#for col in ['ensirekisterointipvm', 'kayttoonottopvm', 'max_date', 'kayttoonotto']:
#    data[col] = pd.to_datetime(data[col])    
#data['usageDays'] = (data.max_date - data.kayttoonottopvm).map(lambda x: x.days)

### Create test case data

In [None]:
factorVars

In [None]:
# Create a test input for the model (same information as in original data)
inputVector = {
    'omamassa': 1340,
    'iskutilavuus': 1900,
    'suurinNettoteho': 193,
    'kayttoonottoVuosi': 2010,
    'ensirekVuosi': 2001,
    'ajoneuvonkaytto': 'Yksityinen',
    'max_date': '2016-09-30',
    'kayttoonottopvm': '2010-06-15',
    'ajoneuvoryhma': 'Maastoauto',
    'kayttovoima': 'Bensiini',
    'vaihteisto': 'Käsivalintainen'
    
}

# Calculate same variables that were used in modeling
inputVector['usageDays'] = (
    pd.to_datetime(inputVector['max_date'])-pd.to_datetime(inputVector['kayttoonottopvm'])
).days
    
# Encode factor variables as dummy variables
for varName in factorVars:
    dummyVariable = '{}_{}'.format(varName, inputVector.pop(varName, 'NA'))
    if dummyVariable in trainColumns:
        inputVector[dummyVariable] = 1
inputVector

### Preprocess input data same way as in model fitting phase

In [None]:
# Formulate input vector in same way as in model fitting.
# Note that the variables must be in same order as in model fitting
x = pd.Series(index=trainColumns)

for v in trainColumns:
    varName = v.split('_')[0]
    # if factor type variable is not in input value should be 0
    if varName in factorVars:
        x[v] = inputVector.get(v, 0)
    else:
        x[v] = inputVector.get(v)
print(x)

# Impute missing values exactly same way as in model fitting
# input vector to imputer must be numpy array (=> .values.reshape(1, -1))
xtmp = imputer.transform(x[numericVars].values.reshape(1, -1))

# Scale numeric variables exactly same way as in model fitting
xtmp = scaler.transform(xtmp)

# put imputed and scaled variables back to input vector
x[inputs['numericVars']] = xtmp[0]
print(x)

### Make a prediction

In [None]:
# Make prediction with the fitted model
# input vector to imputer must be numpy array (=> .values.reshape(1, -1))
model.predict(x.values.reshape(1, -1))[0]