In [156]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from numpy.random import seed

seed(42)

<h2>Data Processing and Feature Extraction</h2>

In [162]:
# Read data and get rid of duplicate data
data_df = pd.read_csv('data.csv')
data_df = data_df.drop_duplicates(['date', 'temperature', 'venue', 'precipitation'])

# Fill NaN values with -1
data_df = data_df.fillna(-1)

# Convert date column to datetime type and sort dataframe by date
data_df['date'] = pd.to_datetime(data_df['date'])
data_df.sort_values(by=['date'], inplace=True, ascending=True)

# Narrow date range
year1_df = data_df.loc[(data_df['date'] >= '2017-10-16') & (data_df['date'] <= '2018-01-31')].copy()
year2_df = data_df.loc[(data_df['date'] >= '2018-10-16') & (data_df['date'] <= '2019-01-31')].copy()

# Group by venue for further processing
year1_df = year1_df.groupby('venue')['temperature','precipitation'].agg(lambda x: list(x))
year2_df = year2_df.groupby('venue')['temperature','precipitation'].agg(lambda x: list(x))

# Drop American Airlines Center and Staples Center
year1_df = year1_df.drop(['American Airlines Center','Staples Center']).reset_index()
year2_df = year2_df.drop(['American Airlines Center','Staples Center']).reset_index()

X = []
test = []

# Create training and validation data
for item in year1_df['temperature'].values:
    X.append(item[-20:])

for item in year2_df['temperature'].values:
    test.append(item[-20:])

<h2>Build model</h2>

In [163]:
class Sequence(nn.Module):
    def __init__(self):
        super(Sequence, self).__init__()
        self.lstm1 = nn.LSTMCell(1, 101)
        self.lstm2 = nn.LSTMCell(101, 101)
        self.linear = nn.Linear(101, 1)

    def forward(self, input, future = 0):
        outputs = []
        h_t = torch.zeros(input.size(0), 101, dtype=torch.double)
        c_t = torch.zeros(input.size(0), 101, dtype=torch.double)
        h_t2 = torch.zeros(input.size(0), 101, dtype=torch.double)
        c_t2 = torch.zeros(input.size(0), 101, dtype=torch.double)

        for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
            h_t, c_t = self.lstm1(input_t, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs += [output]
        for i in range(future):# if we should predict the future
            h_t, c_t = self.lstm1(output, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs += [output]
        outputs = torch.stack(outputs, 1).squeeze(2)
        return outputs


<h2>Train model</h2>

In [165]:
 # set random seed to 0
np.random.seed(0)
torch.manual_seed(0)

# load data and make training set
train_data = np.asarray(X)
val_data = np.asarray(test)
input = torch.from_numpy(train_data[:, :-1])
target = torch.from_numpy(train_data[:, 1:])
test_input = torch.from_numpy(val_data[:, :-1])
test_target = torch.from_numpy(val_data[:, 1:])

# build the model
seq = Sequence()
seq.double()
criterion = nn.MSELoss()

# use LBFGS as optimizer
optimizer = optim.LBFGS(seq.parameters(), lr=0.01)

#begin to train
for i in range(100):
    print('STEP: ', i)
    def closure():
        optimizer.zero_grad()
        out = seq(input)
        loss = criterion(out, target)
        print('loss:', loss.item())
        loss.backward()
        return loss
    optimizer.step(closure)
    # begin to predict
    with torch.no_grad():
        future = 1
        pred = seq(test_input, future=future)
        loss = criterion(pred[:, :-future], test_target)
        print('test loss:', loss.item())
        y = pred.detach().numpy()
        

STEP:  0
loss: 2533.0790213149407
loss: 2533.046361255846
loss: 2395.1085218570292
loss: 2391.139873129242
loss: 2387.1656203407465
loss: 2383.1841237936565
loss: 2379.1941048162917
loss: 2375.194486294863
loss: 2371.184340238463
loss: 2367.162871836824
loss: 2363.1294125176764
loss: 2359.083412234066
loss: 2355.0244284401574
loss: 2350.9521119136757
loss: 2346.866190386461
loss: 2342.7664510229283
loss: 2338.6527226349513
loss: 2334.524858330661
loss: 2330.382719124196
loss: 2326.2261588987826
test loss: 2252.4574126165853
STEP:  1
loss: 2322.055011020794
loss: 2317.869076855476
loss: 2313.6681164499105
loss: 2309.4518417454706
loss: 2305.2199128824814
loss: 2300.971938485939
loss: 2296.7074812848014
loss: 2292.426071009995
loss: 2288.127227182289
loss: 2283.8104949943854
loss: 2279.4754977059747
loss: 2275.122008255272
loss: 2270.7500402978026
loss: 2266.359953581357
loss: 2261.9525597622974
loss: 2257.529203286102
loss: 2253.091781796707
loss: 2248.6426696738336
loss: 2244.184525828

loss: 266.64030850450314
loss: 265.83986017698055
loss: 265.0211660642584
loss: 264.13643213485136
loss: 263.1526555633389
loss: 262.1873707026567
loss: 261.2544398731877
loss: 260.3334761138791
loss: 259.4211197651094
loss: 258.49540291015234
test loss: 155.9599265745301
STEP:  16
loss: 257.57376949472564
loss: 256.6337337238647
loss: 255.68011339376713
loss: 254.7061463795986
loss: 253.7073496649465
loss: 252.68691895055915
loss: 251.63693021726775
loss: 250.56636004120514
loss: 249.46533707844983
loss: 248.34906237806808
loss: 247.2096408946129
loss: 246.06772608435693
loss: 244.9226680076319
loss: 243.81386277723385
loss: 242.7556450379654
loss: 241.81276611474848
loss: 240.9909994969991
loss: 240.30353729399772
loss: 239.70482331833506
loss: 239.16385065104487
test loss: 144.23515978746073
STEP:  17
loss: 238.62924499567688
loss: 238.06620017929896
loss: 237.3447388088472
loss: 236.48938786760363
loss: 235.71066093132103
loss: 234.93536580248778
loss: 234.1103396542221
loss: 233.2

test loss: 14576.753681434873
STEP:  31
loss: 16788.275893324037
loss: 16468.299956142368
loss: 16147.058617226108
loss: 15824.132917158762
loss: 15494.94073247012
loss: 15147.075326403896
loss: 14729.969416857895
loss: 14255.01986921546
loss: 14102.118443203051
loss: 13984.083661493745
loss: 38798.52619448645
loss: 38256.43828706202
loss: 37628.681655467604
loss: 36281.461986484275
loss: 33658.93219474876
loss: 32245.680767200847
loss: 8687.64279321028
loss: 8560.07995440051
loss: 8341.221517588772
loss: 7955.052808927002
test loss: 7628.226361511287
STEP:  32
loss: 7738.564463795189
loss: 7399.064639879591
loss: 6945.808944468646
loss: 6692.07420755696
loss: 6339.391965191997
loss: 5879.067410736825
loss: 5546.690911127905
loss: 5265.065318052912
loss: 4953.607438606138
loss: 4668.805850275561
loss: 4452.840567846323
loss: 4252.137687816448
loss: 3978.19976468705
loss: 3852.1870211229434
loss: 3788.3284756307003
loss: 3682.8102702099613
loss: 3499.080685958019
loss: 3201.059133377419

loss: 238.3026696213173
loss: 232.32185600927878
loss: 227.8453697801844
loss: 225.05371010294374
loss: 223.0578048044716
loss: 221.2221647975236
loss: 219.59839218649446
loss: 218.3111226186804
test loss: 147.0937335390986
STEP:  47
loss: 217.21950220928025
loss: 216.179256710257
loss: 215.22488227584802
loss: 214.32459713704455
loss: 213.49105351170437
loss: 212.68241757248333
loss: 211.91388089963047
loss: 211.16523443238495
loss: 210.44679937812987
loss: 209.73872731761887
loss: 209.05066347025877
loss: 208.36985503113897
loss: 207.70405888174145
loss: 207.0414126241713
loss: 206.3889534682061
loss: 205.73748906347194
loss: 205.0931552909738
loss: 204.44753861521795
loss: 203.80627915973378
loss: 203.16205692047757
test loss: 134.1547597162959
STEP:  48
loss: 202.52010071693812
loss: 201.87359960403526
loss: 201.22751579328707
loss: 200.5753206552901
loss: 199.92172144610586
loss: 199.25993972270146
loss: 198.59438223674104
loss: 197.9176105148119
loss: 197.2335087925867
loss: 196.

test loss: 92.89491529259408
STEP:  62
loss: 109.03567633133119
loss: 109.01081788982208
loss: 108.99007917902568
loss: 108.96988608032648
loss: 108.94990596418283
loss: 108.9300804073361
loss: 108.91021255399693
loss: 108.88558477450826
loss: 108.8551783833257
loss: 108.83067711191433
loss: 108.80610835590291
loss: 108.78200939876444
loss: 108.75662927228578
loss: 108.73147643933424
loss: 108.70482690174012
loss: 108.6729198651242
loss: 108.64834085973747
loss: 108.62225440413617
loss: 108.59331492625354
loss: 108.56741175167718
test loss: 92.31638182975625
STEP:  63
loss: 108.54466767958526
loss: 108.51882399714522
loss: 108.49419069811098
loss: 108.47002569382201
loss: 108.4449770636369
loss: 108.41962739167784
loss: 108.39372133752448
loss: 108.36769905783333
loss: 108.341283452979
loss: 108.31481693450158
loss: 108.28802045515074
loss: 108.26119700759641
loss: 108.23403971063628
loss: 108.2068822165943
loss: 108.17922132534895
loss: 108.15155148646103
loss: 108.1226219962978
loss:

loss: 101.62417714439951
loss: 101.61335912416128
loss: 101.60298893127626
loss: 101.592418415686
loss: 101.58129529454088
loss: 101.57078209084597
loss: 101.56080437802966
loss: 101.55077013321504
loss: 101.54050135163911
loss: 101.53020269461034
loss: 101.52014153853521
loss: 101.51008460653581
loss: 101.49986552063933
loss: 101.489675351497
test loss: 84.04409667349633
STEP:  78
loss: 101.47930014183127
loss: 101.4684001170811
loss: 101.45724936451276
loss: 101.44715137873666
loss: 101.43692802785947
loss: 101.42649232844856
loss: 101.41662939851103
loss: 101.40647791936453
loss: 101.39647816675733
loss: 101.38663660528059
loss: 101.37683780829478
loss: 101.36649295960817
loss: 101.35669287522175
loss: 101.34702115497939
loss: 101.33744719266147
loss: 101.32768318423948
loss: 101.31812343231586
loss: 101.30870128201163
loss: 101.29932485804099
loss: 101.28988479274587
test loss: 84.0300879755701
STEP:  79
loss: 101.28067592733623
loss: 101.27152426514306
loss: 101.26243259744442
los

test loss: 89.38823272048471
STEP:  93
loss: 97.99190691637183
loss: 97.98735455514468
loss: 97.98277864734652
loss: 97.97666775779248
loss: 97.97244272981406
loss: 97.96821590335364
loss: 97.96303051029403
loss: 97.958337924541
loss: 97.95350097574443
loss: 97.94864884056332
loss: 97.94370637646203
loss: 97.93866805119684
loss: 97.93266834883663
loss: 97.92462374636008
loss: 97.91826840780847
loss: 97.91143552438642
loss: 97.903507076187
loss: 97.89545508537121
loss: 97.88743282968693
loss: 97.87917174035991
test loss: 89.76447851271318
STEP:  94
loss: 97.87073684474065
loss: 97.8620731742114
loss: 97.85328170918996
loss: 97.8442905281628
loss: 97.83521714608516
loss: 97.82596627551943
loss: 97.81667813210488
loss: 97.80723423131657
loss: 97.79779425863664
loss: 97.78822352616773
loss: 97.77869343562979
loss: 97.76906027344533
loss: 97.75949518057692
loss: 97.74985167045205
loss: 97.74029286699886
loss: 97.7306749816122
loss: 97.72115233697767
loss: 97.71158723699996
loss: 97.70212632

References:
https://github.com/pytorch/examples/blob/master/time_sequence_prediction/train.py