In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import scipy.stats as stats
from collections import defaultdict
from itertools import product
from sklearn.metrics import mean_absolute_error as mae
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch import optim
from sklearn.preprocessing import StandardScaler

In [2]:
look_back = 72
batch_size = 512
linear_node = 32

In [3]:
dat = pd.read_csv('train.csv', index_col='row_id')

In [4]:
def preprocess(dat):
    time_mapper = {}
    ii = 0
    for h in range(24):
        for mm in ['00','20','40']:
            hh = '{0:02d}'.format(h)
            time_mapper[hh+':'+mm] = ii
            ii += 1

    dat['unique'] = dat['x'].astype(str) + dat['y'].astype(str) + dat['direction']
    uniques = dat['unique'].unique()
    dat['day'] = pd.to_datetime(dat['time']).dt.weekday
    dat['time_stamp'] = dat['time'].apply(lambda x:time_mapper[x.split()[1][:5]])

    tmp = dat.groupby(['unique','day','time_stamp']).agg({'congestion':np.median})
    median_mapper = tmp.to_dict()['congestion']
    dat['median'] = dat.apply(lambda x: \
                              median_mapper[x['unique'],x['day'],x['time_stamp']], axis=1)
    dat['congestion-median'] = dat['congestion'] - dat['median']
    
    all_time = pd.DataFrame(pd.date_range('1991-04-01 00:00:00', '1991-09-30 11:40:00', freq='20Min'), columns=['time'])
    all_time['time'] = all_time['time'].astype(str)
    
    return uniques, median_mapper, time_mapper, all_time

In [5]:
uniques, median_mapper, time_mapper, all_time = preprocess(dat)

In [6]:
def getseries(unique):
    df = dat.loc[dat['unique']==unique, ['time', 'congestion-median']]
    df = pd.merge(all_time, df, left_on='time', right_on='time', how='outer')
    df = df.set_index('time')
    df['congestion-median'] = df['congestion-median'].fillna(0)
    ss = StandardScaler()
    df['congestion-median-normalized'] = ss.fit_transform(df['congestion-median'].values.reshape(-1,1)).reshape(-1)
    return df, ss

In [7]:
def create_dataset(dataset, look_back=5):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back)]
        dataX.append(a)
        dataY.append(dataset[i+look_back])
    return np.array(dataX), np.array(dataY)

In [8]:
def assemble(dat):
    train_loaders, test_loaders = [], []
    for period in test_periods_with_lookback:
        train = dat.loc[dat.index < period[0], 'congestion-median-normalized'].values
        test = dat.loc[(dat.index >= period[0]) & (dat.index <= period[1]), 'congestion-median-normalized'].values
        print(test[0])
        
        X, y = create_dataset(train, look_back=look_back)
        train_dataset = []
        for i in range(len(X)):
            train_dataset.append((torch.tensor(X[i].reshape(-1,1),dtype=torch.float32),
                                  torch.tensor(y[i].reshape(-1,),dtype=torch.float32)))
        train_loaders.append(DataLoader(train_dataset, batch_size=batch_size, drop_last=False))
        
        X, y = create_dataset(test, look_back=look_back)
        test_dataset = []
        for i in range(len(X)):
            test_dataset.append((torch.tensor(X[i].reshape(-1,1),dtype=torch.float32),
                                 torch.tensor(y[i].reshape(-1,),dtype=torch.float32)))
        test_loaders.append(DataLoader(test_dataset, batch_size=batch_size, drop_last=False))
        
    return train_loaders, test_loaders

In [9]:
criterion = nn.L1Loss()

In [10]:
class MyModel(nn.Module):
    def __init__(self, input_feature, hidden_size, output_feature, num_layers=1):
        super(MyModel, self).__init__()
        self.linear = nn.Linear(input_feature, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True, num_layers=num_layers, dropout=0.2)
        ''' gru input is (N,L,H_in=H_hidden), output is (N,L,H_hidden), hidden is (num_layers, h_hidden)'''
        self.linear_out = nn.Linear(hidden_size, output_feature)
        self.hidden_size = hidden_size
        self.num_layers = num_layers
    
    def forward(self, input, hidden):
        ''' X is in the shape of (N,L,input_feature) '''
        output = F.relu(self.linear(input))
        output, hidden = self.gru(output, hidden)
        output = self.linear_out(F.relu(output))
        return output
    
    def initHidden(self, batch_size):
        return torch.zeros((self.num_layers, batch_size, self.hidden_size))

In [11]:
def evaluate(test_loader):
    model.eval()
    with torch.no_grad():
        loss = 0
        n = 0
        for batch, (x, y) in enumerate(test_loader):
            h0 = model.initHidden(len(x))
            output = model.forward(x, h0)
            loss += criterion(output[:,-1,:],y).item() * len(x)
            n += len(x)
        loss /= n
    return loss

def train(n_epoches, train_loader, test_loader):
    optimizer = optim.Adam(model.parameters())
    
    best_test_loss = 100.0
    for epoch in range(n_epoches):
        
        curr_loss = 0.0
        model.train()
        
        n = 0
        for batch, (x, y) in enumerate(train_loader):
            h0 = model.initHidden(len(x))
            output = model.forward(x, h0)
            print(output[-1,-1,:],y[-1])
            loss = criterion(output[:,-1,:], y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            curr_loss += loss*len(x)
            n += len(x)
        
        curr_loss /= len(train_loader.dataset)
        test_loss = evaluate(test_loader)
#         if (epoch % 20 == 0):  print(f'current {epoch} training loss={loss.item()} test loss = {test_loss}')
        print(f'current {epoch} training loss={loss.item()} test loss = {test_loss}')
        if test_loss < best_test_loss:
            best_n_epoches = epoch + 1
            best_test_loss = test_loss
            print(f'updating best loss {epoch} training loss={loss.item()} test loss = {test_loss}')
            
        if epoch > best_n_epoches + 10:
            print('early stop')
            break
    return best_n_epoches

def retrain(n_epoches, train_loader):
    optimizer = optim.Adam(model.parameters())
    
    model.train()
    for epoch in range(n_epoches):
        for batch, (x, y) in enumerate(train_loader):
            h0 = model.initHidden(len(x))
            output = model.forward(x, h0)
            loss = criterion(output[:,-1,:], y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

In [14]:
df

Unnamed: 0_level_0,congestion-median,congestion-median-normalized
time,Unnamed: 1_level_1,Unnamed: 2_level_1
1991-04-01 00:00:00,35.0,2.940104
1991-04-01 00:20:00,35.0,2.940104
1991-04-01 00:40:00,35.0,2.940104
1991-04-01 01:00:00,35.0,2.940104
1991-04-01 01:20:00,35.0,2.940104
...,...,...
1991-09-30 10:20:00,-7.0,-0.686983
1991-09-30 10:40:00,-6.0,-0.600624
1991-09-30 11:00:00,-7.0,-0.686983
1991-09-30 11:20:00,7.0,0.522046


In [15]:
test_periods = [
    ['1991-09-16 12:00:00', '1991-09-16 24:00:00'],
    ['1991-09-23 12:00:00', '1991-09-23 24:00:00']]

all_ss = {}
torch.manual_seed(123)
for unique in uniques[0:1]:
    print(f"doing {unique}")
    
    df, ss = getseries(unique)
    print(ss.mean_, ss.scale_, df['congestion-median-normalized'].std())
    all_ss[unique] = ss
    
    test_periods_with_lookback = []
    for period in test_periods:
        id1 = df.index.to_list().index(period[0])
        test_periods_with_lookback.append([df.index[id1-look_back], period[1]])
    
    model = MyModel(1, linear_node, 1, num_layers=3)
    train_loaders, test_loaders = assemble(df)
    best_n_epoches = train(200, train_loaders[0], test_loaders[0])
    
    model = MyModel(1, linear_node, 1, num_layers=3)
    print('refitting with {best_n_epoches}')
    retrain(best_n_epoches, train_loaders[1])
    
    torch.save(model.state_dict(), 'model_'+unique+'.pickle')

doing 00EB
[0.95494673] [11.57954203] 1.0000380539224218
0.6947643741927023
0.0902499658166924
tensor([0.0362], grad_fn=<SliceBackward0>) tensor([-0.6870])
tensor([-0.0249], grad_fn=<SliceBackward0>) tensor([0.3925])
tensor([-0.0502], grad_fn=<SliceBackward0>) tensor([3.0265])
tensor([-0.0956], grad_fn=<SliceBackward0>) tensor([0.2630])
tensor([-0.0861], grad_fn=<SliceBackward0>) tensor([0.7811])
tensor([-0.1112], grad_fn=<SliceBackward0>) tensor([0.2630])
tensor([-0.1095], grad_fn=<SliceBackward0>) tensor([0.7811])
tensor([-0.1554], grad_fn=<SliceBackward0>) tensor([1.0402])
tensor([-0.1208], grad_fn=<SliceBackward0>) tensor([0.4357])
tensor([-0.0979], grad_fn=<SliceBackward0>) tensor([-1.5506])
tensor([-0.0274], grad_fn=<SliceBackward0>) tensor([-0.0825])
tensor([-0.1248], grad_fn=<SliceBackward0>) tensor([-0.7302])
tensor([-0.1249], grad_fn=<SliceBackward0>) tensor([1.0402])
tensor([-0.0834], grad_fn=<SliceBackward0>) tensor([0.1766])
tensor([-0.0243], grad_fn=<SliceBackward0>) tens

tensor([-0.2054], grad_fn=<SliceBackward0>) tensor([0.7811])
tensor([0.2377], grad_fn=<SliceBackward0>) tensor([0.2630])
tensor([0.6393], grad_fn=<SliceBackward0>) tensor([0.7811])
tensor([-0.5778], grad_fn=<SliceBackward0>) tensor([1.0402])
tensor([0.3482], grad_fn=<SliceBackward0>) tensor([0.4357])
tensor([-0.4690], grad_fn=<SliceBackward0>) tensor([-1.5506])
tensor([0.6384], grad_fn=<SliceBackward0>) tensor([-0.0825])
tensor([0.1907], grad_fn=<SliceBackward0>) tensor([-0.7302])
tensor([-0.1660], grad_fn=<SliceBackward0>) tensor([1.0402])
tensor([-0.0822], grad_fn=<SliceBackward0>) tensor([0.1766])
tensor([0.2601], grad_fn=<SliceBackward0>) tensor([-1.5506])
tensor([-0.2640], grad_fn=<SliceBackward0>) tensor([-0.6870])
tensor([0.3092], grad_fn=<SliceBackward0>) tensor([-1.2051])
tensor([0.4411], grad_fn=<SliceBackward0>) tensor([0.7379])
tensor([-0.2868], grad_fn=<SliceBackward0>) tensor([-0.2120])
tensor([-0.3137], grad_fn=<SliceBackward0>) tensor([-1.3347])
tensor([-0.0719], grad_f

tensor([0.0754], grad_fn=<SliceBackward0>) tensor([-0.0825])
tensor([0.0930], grad_fn=<SliceBackward0>) tensor([-0.7302])
tensor([-0.0737], grad_fn=<SliceBackward0>) tensor([1.0402])
tensor([-0.0163], grad_fn=<SliceBackward0>) tensor([0.1766])
tensor([0.1474], grad_fn=<SliceBackward0>) tensor([-1.5506])
tensor([-0.2105], grad_fn=<SliceBackward0>) tensor([-0.6870])
tensor([0.2160], grad_fn=<SliceBackward0>) tensor([-1.2051])
tensor([0.2984], grad_fn=<SliceBackward0>) tensor([0.7379])
tensor([-0.2639], grad_fn=<SliceBackward0>) tensor([-0.2120])
tensor([-0.5324], grad_fn=<SliceBackward0>) tensor([-1.3347])
tensor([-0.1044], grad_fn=<SliceBackward0>) tensor([-0.0825])
tensor([0.3207], grad_fn=<SliceBackward0>) tensor([-0.3415])
tensor([0.1927], grad_fn=<SliceBackward0>) tensor([0.1334])
tensor([-0.1052], grad_fn=<SliceBackward0>) tensor([-0.9461])
current 10 training loss=0.6413906812667847 test loss = 0.8256176114082336
updating best loss 10 training loss=0.6413906812667847 test loss = 0

tensor([0.0182], grad_fn=<SliceBackward0>) tensor([0.1766])
tensor([0.0960], grad_fn=<SliceBackward0>) tensor([-1.5506])
tensor([-0.2649], grad_fn=<SliceBackward0>) tensor([-0.6870])
tensor([0.0954], grad_fn=<SliceBackward0>) tensor([-1.2051])
tensor([0.2923], grad_fn=<SliceBackward0>) tensor([0.7379])
tensor([-0.2620], grad_fn=<SliceBackward0>) tensor([-0.2120])
tensor([-0.3118], grad_fn=<SliceBackward0>) tensor([-1.3347])
tensor([-0.0321], grad_fn=<SliceBackward0>) tensor([-0.0825])
tensor([0.1049], grad_fn=<SliceBackward0>) tensor([-0.3415])
tensor([0.1310], grad_fn=<SliceBackward0>) tensor([0.1334])
tensor([-0.0895], grad_fn=<SliceBackward0>) tensor([-0.9461])
current 15 training loss=0.6367776393890381 test loss = 0.8063948750495911
updating best loss 15 training loss=0.6367776393890381 test loss = 0.8063948750495911
tensor([-0.3515], grad_fn=<SliceBackward0>) tensor([-0.6870])
tensor([-1.3609], grad_fn=<SliceBackward0>) tensor([0.3925])
tensor([-0.3129], grad_fn=<SliceBackward0>)

tensor([-0.0747], grad_fn=<SliceBackward0>) tensor([-0.0825])
tensor([0.1178], grad_fn=<SliceBackward0>) tensor([-0.3415])
tensor([0.1139], grad_fn=<SliceBackward0>) tensor([0.1334])
tensor([-0.1027], grad_fn=<SliceBackward0>) tensor([-0.9461])
current 20 training loss=0.6299672722816467 test loss = 0.8099434971809387
tensor([-0.3796], grad_fn=<SliceBackward0>) tensor([-0.6870])
tensor([-1.5306], grad_fn=<SliceBackward0>) tensor([0.3925])
tensor([-0.2981], grad_fn=<SliceBackward0>) tensor([3.0265])
tensor([-0.1910], grad_fn=<SliceBackward0>) tensor([0.2630])
tensor([-0.2545], grad_fn=<SliceBackward0>) tensor([0.7811])
tensor([0.0903], grad_fn=<SliceBackward0>) tensor([0.2630])
tensor([0.2116], grad_fn=<SliceBackward0>) tensor([0.7811])
tensor([-0.2228], grad_fn=<SliceBackward0>) tensor([1.0402])
tensor([0.1572], grad_fn=<SliceBackward0>) tensor([0.4357])
tensor([-0.5220], grad_fn=<SliceBackward0>) tensor([-1.5506])
tensor([-0.0359], grad_fn=<SliceBackward0>) tensor([-0.0825])
tensor([0

tensor([-0.1594], grad_fn=<SliceBackward0>) tensor([0.2630])
tensor([-0.2753], grad_fn=<SliceBackward0>) tensor([0.7811])
tensor([0.0714], grad_fn=<SliceBackward0>) tensor([0.2630])
tensor([0.2164], grad_fn=<SliceBackward0>) tensor([0.7811])
tensor([-0.2250], grad_fn=<SliceBackward0>) tensor([1.0402])
tensor([0.1911], grad_fn=<SliceBackward0>) tensor([0.4357])
tensor([-0.8039], grad_fn=<SliceBackward0>) tensor([-1.5506])
tensor([0.0043], grad_fn=<SliceBackward0>) tensor([-0.0825])
tensor([0.1607], grad_fn=<SliceBackward0>) tensor([-0.7302])
tensor([-0.0326], grad_fn=<SliceBackward0>) tensor([1.0402])
tensor([0.0119], grad_fn=<SliceBackward0>) tensor([0.1766])
tensor([0.0825], grad_fn=<SliceBackward0>) tensor([-1.5506])
tensor([-0.0306], grad_fn=<SliceBackward0>) tensor([-0.6870])
tensor([0.2091], grad_fn=<SliceBackward0>) tensor([-1.2051])
tensor([0.1586], grad_fn=<SliceBackward0>) tensor([0.7379])
tensor([-0.3219], grad_fn=<SliceBackward0>) tensor([-0.2120])
tensor([-0.1322], grad_fn=

  return F.l1_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (72) must match the size of tensor b (512) at non-singleton dimension 1