In [1]:
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader
import json
import dataset
import algorithm

In [2]:
CONFIG = json.load(open('config.json'))

In [3]:
Dataset = dataset.download_dataset(CONFIG['data']['filename'])

In [4]:
data = dataset.get_data(Dataset)
data.head()

Unnamed: 0,area,material,condicion,anio_construccion,anio_remodelacion,sotano,calefaccion,aire_acondicionado,area_construida_piso_1,area_construida,banios,banios_sin_ducha,dormitorios,chimeneas,area_garage,area_piscina,precio
0,8450,7,5,2003,2003,0,1,1,856,1710,2,1,3,0,548,0,208500
1,9600,6,8,1976,1976,0,1,1,1262,1262,2,0,3,1,460,0,181500
2,11250,7,5,2001,2002,0,1,1,920,1786,2,1,3,1,608,0,223500
3,9550,7,5,1915,1970,1,1,1,961,1717,1,0,3,1,642,0,140000
4,14260,8,5,2000,2000,0,1,1,1145,2198,2,1,4,1,836,0,250000


In [5]:
normalize_data = dataset.Normalizer.transform(data)

In [6]:
data_x, data_y = dataset.split_data_x_y(normalize_data)

In [7]:
data_x_train, data_x_test = dataset.split_data_train_test(data_x, CONFIG["data"]["train_split_size"])
data_y_train, data_y_test = dataset.split_data_train_test(data_y, CONFIG["data"]["train_split_size"])

In [8]:
dataset_train = dataset.TimeSeriesDataset(data_x_train, data_y_train)
dataset_test = dataset.TimeSeriesDataset(data_x_test, data_y_test)

In [9]:
# DataLoader => Devuelve un conjunto de iteraciones, cada iteracion contiene un arreglo de 'batch_size' registros
train_dataloader = DataLoader(dataset_train, CONFIG["training"]["batch_size"], shuffle=True)
test_dataloader = DataLoader(dataset_test, CONFIG["training"]["batch_size"], shuffle=True)

In [10]:
model = algorithm.Model(config_model=CONFIG["model"], config_training=CONFIG["training"])

In [11]:
# Entrenamiento del modelo
for epoch in range(CONFIG["training"]["num_epoch"]):
    loss_train, lr_train = model.run_epoch(train_dataloader, is_training=True)
    loss_test, lr_test = model.run_epoch(test_dataloader)
    if epoch == 0 or ((epoch+1)%10 == 0):
        print('Epoch[{}/{}] | loss train:{:.6f}, test:{:.6f} | lr:{:.6f}'
              .format(epoch+1, CONFIG["training"]["num_epoch"], loss_train, loss_test, lr_train))

Epoch[1/100] | loss train:0.004967, test:0.000780 | lr:0.010000
Epoch[10/100] | loss train:0.000860, test:0.000410 | lr:0.010000
Epoch[20/100] | loss train:0.000893, test:0.000352 | lr:0.010000
Epoch[30/100] | loss train:0.000457, test:0.000329 | lr:0.010000
Epoch[40/100] | loss train:0.000527, test:0.000314 | lr:0.010000
Epoch[50/100] | loss train:0.000495, test:0.000332 | lr:0.010000
Epoch[60/100] | loss train:0.000636, test:0.000614 | lr:0.010000
Epoch[70/100] | loss train:0.000505, test:0.000977 | lr:0.010000
Epoch[80/100] | loss train:0.000396, test:0.000382 | lr:0.010000
Epoch[90/100] | loss train:0.000445, test:0.000307 | lr:0.010000
Epoch[100/100] | loss train:0.000424, test:0.000440 | lr:0.010000


In [13]:
# Verificacion de la prediccion del modelo
error = 0
i=0

for idx, (x, y) in enumerate(test_dataloader):
    i+=1
    x = x.to(CONFIG["training"]["device"])
    out = model(x)
    out_vector = out.detach().numpy()
    y_vector = y.numpy()
    for i in range(len(y_vector)):
        price_predict = dataset.Normalizer.inverse_transform(out_vector[i], CONFIG["model"]["input_size"]+1)
        price_real = dataset.Normalizer.inverse_transform(y_vector[i], CONFIG["model"]["input_size"]+1)
        porcentage = price_predict/price_real
        error += abs(1-porcentage)
        print('Price predict: {0}, Price real: {1}, Diferencia: {2} ({3})'.format(price_predict, price_real, price_predict-price_real, (porcentage)))
    
    print("margen de error {}%".format(round(100 * error / i, 2)))
    break

Price predict: 133000, Price real: 127500, Diferencia: 5500 (1.0431372549019609)
Price predict: 98342, Price real: 90000, Diferencia: 8342 (1.0926888888888888)
Price predict: 257453, Price real: 250000, Diferencia: 7453 (1.029812)
Price predict: 137618, Price real: 143750, Diferencia: -6132 (0.9573426086956521)
Price predict: 193077, Price real: 174000, Diferencia: 19077 (1.1096379310344828)
Price predict: 211960, Price real: 200000, Diferencia: 11960 (1.0598)
Price predict: 208088, Price real: 224900, Diferencia: -16812 (0.9252467763450423)
Price predict: 133398, Price real: 153500, Diferencia: -20102 (0.869042345276873)
Price predict: 103134, Price real: 107000, Diferencia: -3866 (0.9638691588785047)
Price predict: 129964, Price real: 147500, Diferencia: -17536 (0.8811118644067797)
Price predict: 89723, Price real: 125500, Diferencia: -35777 (0.7149243027888447)
Price predict: 205878, Price real: 197900, Diferencia: 7978 (1.0403132895401719)
Price predict: 279161, Price real: 392500,

In [24]:
import torch
d = np.array([[8635, 5, 5, 1948, 2001, 'TA', 'GasA', 'Y', 1072, 1285,1, 0, 2, 0, 240, 0, 0]])
d = pd.DataFrame(d, columns=['area', 'material', 'condicion', 'anio_construccion', 'anio_remodelacion', 'sotano', 'calefaccion',  'aire_acondicionado', 'area_construida_piso_1', 'area_construida', 'banios', 'banios_sin_ducha', 'dormitorios', 'chimeneas', 'area_garage', 'area_piscina', 'precio'])
d = dataset.get_data(d)

d = dataset.Normalizer.transform(d)
info_x, _ = dataset.split_data_x_y(d)
x = torch.tensor(info_x).float().to(CONFIG["training"]["device"])
out = model(x)
out_vector = out.detach().numpy()
price_predict = dataset.Normalizer.inverse_transform(out_vector[0], CONFIG["model"]["input_size"]+1)
print(price_predict)

0
