In [2]:
import math
from pathlib import Path

import pandas as pd
import torch

from common import FieldType, PreprocessingType, MODEL_CKPT_NAME, TRANSMISSON_MAPPING
from dataset import get_additional_fields_json
from model import MLP


device = torch.device('cpu')                      

In [3]:
# load best model from checkpoint
model_name = '20240513_012306'
model_path = Path(f'ckpts/{model_name}/best/{MODEL_CKPT_NAME}')
ckpt = torch.load(model_path, map_location='cpu')
model = MLP(ckpt['shapes'])
model.load_state_dict(ckpt['sd'])
model = model.to(device)
feature_medata = ckpt['features']

In [5]:
df = pd.read_csv('data/ML_zadatak_auti.csv')
# load only test data
test_series = df[df['condition_id'] == 40]
# field in dataframe with all additional useful information
afj = get_additional_fields_json(test_series)
for idx, row in enumerate(afj):
    # contains values that go into the model
    tensor = []
    for feat_name, feat in feature_medata.items():
        # value from the dataframe
        raw_val = row[feat_name]
        if feat['type'] == FieldType.CATEGORICAL:
            if feat_name == 'transmissionTypeId':
                # skip if transmission is nan, maybe replace it with most common one?
                if type(raw_val) is float and math.isnan(raw_val):
                    break 
                val = TRANSMISSON_MAPPING[raw_val]
                val = torch.nn.functional.one_hot(torch.tensor(val), num_classes=len(TRANSMISSON_MAPPING))
                tensor.append(val.reshape(-1, 1))
            else:
                raise ValueError('Categorical field not yet supported')
        elif feat['type'] == FieldType.NUMERICAL:
            if ckpt['preprocessing_type'] == PreprocessingType.STANDARDIZATION:
                # use mean and std from training set
                val = (raw_val - feat['mean']) / feat['std']
                tensor.append(torch.tensor(val).reshape(1, -1))
            else:
                raise ValueError('Preprocesing not yet supported')
        else:
            raise ValueError('Field type not yet supported')
            
    tensor = torch.cat(tensor).reshape(1, -1).to(torch.float32).to(device)
    
    # when some transmission is skipped, the input tensor does not have enough values
    if tensor.shape[1] != ckpt['shapes'][0]:
        continue

    predicted_price = model(tensor)

    print('predicted price: ', round(predicted_price.item(), 2))
    print('actual price:    ', test_series.iloc[idx]['price'])
    print('---')

predicted price:  10175.82
actual price:     7800.0
---
predicted price:  18124.86
actual price:     25217.33
---
predicted price:  14972.43
actual price:     13126.29
---
predicted price:  16008.41
actual price:     19231.53
---
predicted price:  38879.59
actual price:     34494.66
---
predicted price:  40604.86
actual price:     39816.71
---
predicted price:  18472.77
actual price:     23744.11
---
predicted price:  14989.31
actual price:     21102.93
---
predicted price:  17301.77
actual price:     25734.95
---
predicted price:  58225.75
actual price:     93500.0
---
predicted price:  16778.01
actual price:     22695.6
---
predicted price:  21508.27
actual price:     26133.12
---
predicted price:  63066.19
actual price:     46439.71
---
predicted price:  42481.53
actual price:     33831.04
---
predicted price:  32830.56
actual price:     24540.45
---
predicted price:  13112.87
actual price:     14950.0
---
predicted price:  34110.36
actual price:     25867.68
---
predicted price:  1