In [1]:
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader
import json

In [2]:
import dataset
import algorithm

In [3]:
CONFIG = json.load(open('config.json'))

In [4]:
Dataset = dataset.download_dataset(CONFIG['data']['filename'])

In [5]:
Dataset.head()

Unnamed: 0,area,material,condicion,anio_construccion,anio_remodelacion,sotano,calefaccion,aire_acondicionado,area_construida_piso_1,area_construida_piso_2,...,banios,banios_sin_ducha,dormitorios,cocinas,chimeneas,area_garage,area_piscina,mes_venta,anio_venta,precio
0,8450,7,5,2003,2003,Gd,GasA,Y,856,854,...,2,1,3,1,0,548,0,2,2008,208500
1,9600,6,8,1976,1976,Gd,GasA,Y,1262,0,...,2,0,3,1,1,460,0,5,2007,181500
2,11250,7,5,2001,2002,Gd,GasA,Y,920,866,...,2,1,3,1,1,608,0,9,2008,223500
3,9550,7,5,1915,1970,TA,GasA,Y,961,756,...,1,0,3,1,1,642,0,2,2006,140000
4,14260,8,5,2000,2000,Gd,GasA,Y,1145,1053,...,2,1,4,1,1,836,0,12,2008,250000


In [6]:
data = dataset.get_data(Dataset)
data.head()

Unnamed: 0,area,material,condicion,anio_construccion,anio_remodelacion,sotano,calefaccion,aire_acondicionado,area_construida_piso_1,area_construida_piso_2,...,banios,banios_sin_ducha,dormitorios,cocinas,chimeneas,area_garage,area_piscina,mes_venta,anio_venta,precio
0,8450,7,5,2003,2003,0,1,1,856,854,...,2,1,3,1,0,548,0,2,2008,208500
1,9600,6,8,1976,1976,0,1,1,1262,0,...,2,0,3,1,1,460,0,5,2007,181500
2,11250,7,5,2001,2002,0,1,1,920,866,...,2,1,3,1,1,608,0,9,2008,223500
3,9550,7,5,1915,1970,1,1,1,961,756,...,1,0,3,1,1,642,0,2,2006,140000
4,14260,8,5,2000,2000,0,1,1,1145,1053,...,2,1,4,1,1,836,0,12,2008,250000


In [7]:
normalize_data = dataset.Normalizer.transform(data)

In [8]:
data_x, data_y = dataset.split_data_x_y(normalize_data)

In [9]:
data_x_train, data_x_test = dataset.split_data_train_test(data_x, CONFIG["data"]["train_split_size"])
data_y_train, data_y_test = dataset.split_data_train_test(data_y, CONFIG["data"]["train_split_size"])

In [10]:
dataset_train = dataset.TimeSeriesDataset(data_x_train, data_y_train)
dataset_test = dataset.TimeSeriesDataset(data_x_test, data_y_test)

In [11]:
# DataLoader => Devuelve un conjunto de iteraciones, cada iteracion contiene un arreglo de 'batch_size' registros
train_dataloader = DataLoader(dataset_train, CONFIG["training"]["batch_size"], shuffle=True)
test_dataloader = DataLoader(dataset_test, CONFIG["training"]["batch_size"], shuffle=True)

In [12]:
model = algorithm.Model(config_model=CONFIG["model"], config_training=CONFIG["training"])

In [13]:
# Entrenamiento del modelo
for epoch in range(CONFIG["training"]["num_epoch"]):
    loss_train, lr_train = model.run_epoch(train_dataloader, is_training=True)
    loss_test, lr_test = model.run_epoch(test_dataloader)
    model.scheduler.step()
    if epoch == 0 or ((epoch+1)%10 == 0):
        print('Epoch[{}/{}] | loss train:{:.6f}, test:{:.6f} | lr:{:.6f}'
              .format(epoch+1, CONFIG["training"]["num_epoch"], loss_train, loss_test, lr_train))

Epoch[1/100] | loss train:0.004339, test:0.000697 | lr:0.010000
Epoch[10/100] | loss train:0.000639, test:0.000360 | lr:0.010000
Epoch[20/100] | loss train:0.000508, test:0.000377 | lr:0.010000
Epoch[30/100] | loss train:0.000511, test:0.000413 | lr:0.010000
Epoch[40/100] | loss train:0.000433, test:0.000446 | lr:0.010000
Epoch[50/100] | loss train:0.000380, test:0.000449 | lr:0.001000
Epoch[60/100] | loss train:0.000381, test:0.000442 | lr:0.001000
Epoch[70/100] | loss train:0.000378, test:0.000448 | lr:0.001000
Epoch[80/100] | loss train:0.000405, test:0.001100 | lr:0.001000
Epoch[90/100] | loss train:0.000340, test:0.000473 | lr:0.000100
Epoch[100/100] | loss train:0.000364, test:0.000464 | lr:0.000100


In [14]:
# Verificacion de la prediccion del modelo
error = 0
i=0
for idx, (x, y) in enumerate(test_dataloader):
    i+=1
    x = x.to(CONFIG["training"]["device"])
    out = model(x)
    out_vector = out.detach().numpy()
    y_vector = y.numpy()
    for i in range(len(y_vector)):
        price_predict = dataset.Normalizer.inverse_transform(out_vector[i], 21)
        price_real = dataset.Normalizer.inverse_transform(y_vector[i], 21)
        porcentage = price_predict/price_real
        error += abs(1-porcentage)
        print('Price predict: {0}, Price real: {1}, Diferencia: {2} ({3})'.format(price_predict, price_real, price_predict-price_real, (porcentage)))
    
    print("margen de error {}%".format(round(100 * error / i, 2)))
    break

Price predict: 153570, Price real: 142500, Diferencia: 11070 (1.0776842105263158)
Price predict: 232540, Price real: 274300, Diferencia: -41760 (0.847757929274517)
Price predict: 225190, Price real: 164000, Diferencia: 61190 (1.373109756097561)
Price predict: 137822, Price real: 155000, Diferencia: -17178 (0.8891741935483871)
Price predict: 118814, Price real: 145000, Diferencia: -26186 (0.8194068965517242)
Price predict: 264623, Price real: 237000, Diferencia: 27623 (1.1165527426160338)
Price predict: 162883, Price real: 179900, Diferencia: -17017 (0.905408560311284)
Price predict: 202170, Price real: 224900, Diferencia: -22730 (0.898932859048466)
Price predict: 113382, Price real: 119000, Diferencia: -5618 (0.9527899159663865)
Price predict: 173005, Price real: 203000, Diferencia: -29995 (0.8522413793103448)
Price predict: 143083, Price real: 154000, Diferencia: -10917 (0.9291103896103896)
Price predict: 253258, Price real: 250000, Diferencia: 3258 (1.013032)
Price predict: 249353, P