In [23]:
import torch
from torch import nn
from torch.autograd import Variable
import pandas as pd
import dateutil
import datetime
from sklearn.model_selection import train_test_split
import numpy as np
import torch.nn.functional as funcs

In [24]:
# 68,335,729 data points in total
import random
p = 0.0001  # 0.01% of the lines -> 6800 data points
# keep the header, then take only 1% of lines
# if random from [0,1] interval is greater than 0.01 the row will be skipped
"""
df = pd.read_csv(
         'data/all_events.csv',
         header=0, 
         skiprows=lambda i: i>0 and random.random() > p
)
"""
# df = pd.read_csv('../data/Kiasma_sorted.csv', nrows=1000000, low_memory=False)
# df = pd.read_csv('../data/Rautatientoriitä_sorted.csv', low_memory=False)
df = pd.read_csv('../data/AaltoyliopistoMKorkeakouluaukio_sorted.csv', low_memory=False)

## Drop duplicated based on timestamp

In [25]:
df = df.drop_duplicates('ts').reset_index()

## Add some useful columns

In [26]:
# sort based on timestamp
df = df.sort_values('ts')
# the hour and minutes of the timestamp
df['hour_and_minutes'] = df['ts'].apply(lambda x: x[11:16])
# month of timestamp
df['month'] = df['ts'].apply(lambda x: x[5:7])
# the percentage of bikes
df['bikes_percent'] = df['bikes'] / df['total_slots']
df['minutes_from_midnight'] = df['ts'].apply(lambda x: int(x[11:13]) * 60 + int(x[14:16]))
df['hour'] = df['hour_and_minutes'].apply(lambda x: x[:2])
df['date'] = df['ts'].apply(pd.to_datetime)
df['weekday'] = df['date'].apply(lambda x: x.weekday())
df['weekend'] = df['weekday'].apply(lambda x: x > 4)
df.head()

Unnamed: 0,index,ts,lat,lon,operative,sid,name,bikes,total_slots,hour_and_minutes,month,bikes_percent,minutes_from_midnight,hour,date,weekday,weekend
0,0,2018-05-11T07:48:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",5,40,07:48,5,0.125,468,7,2018-05-11 07:48:01,4,False
1,1,2018-05-11T07:49:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:49,5,0.1,469,7,2018-05-11 07:49:01,4,False
2,2,2018-05-11T07:50:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:50,5,0.1,470,7,2018-05-11 07:50:01,4,False
3,3,2018-05-11T07:51:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:51,5,0.1,471,7,2018-05-11 07:51:01,4,False
4,4,2018-05-11T07:52:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:52,5,0.1,472,7,2018-05-11 07:52:01,4,False


## Prepare for predictions

### Add column which states whether a bike was added at a given moment

In [27]:
df['bike_added'] = df.apply(lambda x: df.iloc[(x.name - 1 if x.name > 0 else 0)]['bikes'] < x['bikes'] , axis=1)
df['bike_removed'] = df.apply(lambda x: df.iloc[(x.name - 1 if x.name > 0 else 0)]['bikes'] > x['bikes'] , axis=1)
df.head(100)

Unnamed: 0,index,ts,lat,lon,operative,sid,name,bikes,total_slots,hour_and_minutes,month,bikes_percent,minutes_from_midnight,hour,date,weekday,weekend,bike_added,bike_removed
0,0,2018-05-11T07:48:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",5,40,07:48,05,0.125,468,07,2018-05-11 07:48:01,4,False,False,False
1,1,2018-05-11T07:49:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:49,05,0.100,469,07,2018-05-11 07:49:01,4,False,False,True
2,2,2018-05-11T07:50:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:50,05,0.100,470,07,2018-05-11 07:50:01,4,False,False,False
3,3,2018-05-11T07:51:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:51,05,0.100,471,07,2018-05-11 07:51:01,4,False,False,False
4,4,2018-05-11T07:52:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:52,05,0.100,472,07,2018-05-11 07:52:01,4,False,False,False
5,5,2018-05-11T07:53:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:53,05,0.100,473,07,2018-05-11 07:53:01,4,False,False,False
6,6,2018-05-11T07:54:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:54,05,0.100,474,07,2018-05-11 07:54:01,4,False,False,False
7,7,2018-05-11T07:55:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:55,05,0.100,475,07,2018-05-11 07:55:01,4,False,False,False
8,8,2018-05-11T07:56:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:56,05,0.100,476,07,2018-05-11 07:56:01,4,False,False,False
9,9,2018-05-11T07:57:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:57,05,0.100,477,07,2018-05-11 07:57:01,4,False,False,False


### Add max waiting time for next bike

In [28]:
# extract difference in minutes to a separate column
# on the moment that a bike was added, the waiting time is the time to next bike
i = 0
first_non_additive_index = 0
while (i < len(df)):
    while (i < len(df) and df.iloc[i]['bike_added'] == False):
        i += 1
    if (i >= len(df)):
        break
    additive_row = df.iloc[i]
    time = additive_row['minutes_from_midnight']
    difference = abs(time - df.iloc[first_non_additive_index]['minutes_from_midnight'])
    df.loc[first_non_additive_index:(i-1 if i > 0 else 0), 'max_wait_for_new_bike'] = difference 
    date_time = additive_row['date']
    last_date = df.iloc[first_non_additive_index]['date']
    date_difference = abs((date_time - last_date).total_seconds() / 60)
    df.loc[first_non_additive_index:(i-1 if i > 0 else 0), 'max_wait_for_new_bike_as_date'] = date_difference 
    first_non_additive_index = i
    i += 1
# if we can't know when the next bike will be added, fill with -1 for now
df = df.fillna(-1)
df.head(100)

Unnamed: 0,index,ts,lat,lon,operative,sid,name,bikes,total_slots,hour_and_minutes,...,bikes_percent,minutes_from_midnight,hour,date,weekday,weekend,bike_added,bike_removed,max_wait_for_new_bike,max_wait_for_new_bike_as_date
0,0,2018-05-11T07:48:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",5,40,07:48,...,0.125,468,07,2018-05-11 07:48:01,4,False,False,False,16.0,16.0
1,1,2018-05-11T07:49:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:49,...,0.100,469,07,2018-05-11 07:49:01,4,False,False,True,16.0,16.0
2,2,2018-05-11T07:50:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:50,...,0.100,470,07,2018-05-11 07:50:01,4,False,False,False,16.0,16.0
3,3,2018-05-11T07:51:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:51,...,0.100,471,07,2018-05-11 07:51:01,4,False,False,False,16.0,16.0
4,4,2018-05-11T07:52:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:52,...,0.100,472,07,2018-05-11 07:52:01,4,False,False,False,16.0,16.0
5,5,2018-05-11T07:53:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:53,...,0.100,473,07,2018-05-11 07:53:01,4,False,False,False,16.0,16.0
6,6,2018-05-11T07:54:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:54,...,0.100,474,07,2018-05-11 07:54:01,4,False,False,False,16.0,16.0
7,7,2018-05-11T07:55:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:55,...,0.100,475,07,2018-05-11 07:55:01,4,False,False,False,16.0,16.0
8,8,2018-05-11T07:56:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:56,...,0.100,476,07,2018-05-11 07:56:01,4,False,False,False,16.0,16.0
9,9,2018-05-11T07:57:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:57,...,0.100,477,07,2018-05-11 07:57:01,4,False,False,False,16.0,16.0


### Add idle time for each station

In [29]:
i = 0
idle_for = 0
idle_for_as_date = 0
while (i < len(df)):
    row = df.iloc[i]
    previous_row = df.iloc[(i-1 if i > 0 else 0)]
    if (not row['bikes'] == previous_row['bikes']):
        idle_for = 0
        idle_for_as_date = 0
        df.loc[i, 'idle_for'] = idle_for
        df.loc[i, 'idle_for_as_date'] = idle_for_as_date        
        i+= 1
        continue
    time_now = row['minutes_from_midnight']
    time_last = previous_row['minutes_from_midnight']
    date_time_now = row['date']
    date_time_last = previous_row['date']
    idle_for += abs(time_now - time_last)
    secs = (date_time_now - date_time_last).total_seconds()
    idle_for_as_date += abs(secs / 60)
    df.loc[i, 'idle_for'] = idle_for
    df.loc[i, 'idle_for_as_date'] = idle_for_as_date
    i += 1
# if we can't know when the next bike will be taken, fill with -1 for now
df = df.fillna(-1)
df.head(50)

Unnamed: 0,index,ts,lat,lon,operative,sid,name,bikes,total_slots,hour_and_minutes,...,hour,date,weekday,weekend,bike_added,bike_removed,max_wait_for_new_bike,max_wait_for_new_bike_as_date,idle_for,idle_for_as_date
0,0,2018-05-11T07:48:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",5,40,07:48,...,7,2018-05-11 07:48:01,4,False,False,False,16.0,16.0,0.0,0.0
1,1,2018-05-11T07:49:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:49,...,7,2018-05-11 07:49:01,4,False,False,True,16.0,16.0,0.0,0.0
2,2,2018-05-11T07:50:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:50,...,7,2018-05-11 07:50:01,4,False,False,False,16.0,16.0,1.0,1.0
3,3,2018-05-11T07:51:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:51,...,7,2018-05-11 07:51:01,4,False,False,False,16.0,16.0,2.0,2.0
4,4,2018-05-11T07:52:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:52,...,7,2018-05-11 07:52:01,4,False,False,False,16.0,16.0,3.0,3.0
5,5,2018-05-11T07:53:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:53,...,7,2018-05-11 07:53:01,4,False,False,False,16.0,16.0,4.0,4.0
6,6,2018-05-11T07:54:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:54,...,7,2018-05-11 07:54:01,4,False,False,False,16.0,16.0,5.0,5.0
7,7,2018-05-11T07:55:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:55,...,7,2018-05-11 07:55:01,4,False,False,False,16.0,16.0,6.0,6.0
8,8,2018-05-11T07:56:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:56,...,7,2018-05-11 07:56:01,4,False,False,False,16.0,16.0,7.0,7.0
9,9,2018-05-11T07:57:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:57,...,7,2018-05-11 07:57:01,4,False,False,False,16.0,16.0,8.0,8.0


In [31]:
df.describe()

Unnamed: 0,index,lat,lon,operative,sid,bikes,total_slots,bikes_percent,minutes_from_midnight,weekday,max_wait_for_new_bike,max_wait_for_new_bike_as_date,idle_for,idle_for_as_date
count,128023.0,128023.0,128023.0,128023.0,128023.0,128023.0,128023.0,128023.0,128023.0,128023.0,128023.0,128023.0,128023.0,128023.0
mean,64879.642674,60.18431,24.82667,1.0,541.0,7.830398,40.599752,0.192926,732.6328,3.040883,239.543691,95.268873,146.433375,34.819797
std,38422.483995,4.872211e-11,6.65639e-11,0.0,0.0,9.51694,0.48995,0.234671,404.983005,1.993114,421.196981,133.995827,441.413243,70.843195
min,0.0,60.18431,24.82667,1.0,541.0,0.0,40.0,0.0,0.0,0.0,-1.0,-1.0,0.0,0.0
25%,32005.5,60.18431,24.82667,1.0,541.0,1.0,40.0,0.02439,388.0,1.0,13.0,13.0,2.0,2.0
50%,64011.0,60.18431,24.82667,1.0,541.0,4.0,41.0,0.097561,734.0,3.0,28.0,28.0,6.0,6.0
75%,96016.5,60.18431,24.82667,1.0,541.0,12.0,41.0,0.292683,1081.0,5.0,102.0,101.0,25.0,24.0
max,142935.0,60.18431,24.82667,1.0,541.0,60.0,41.0,1.463415,1439.0,6.0,1400.0,593.0,1937.0,503.0


### Add time for next bike take

Predict how much time for the next bike to be taken: max waiting time is not useful here.

In [33]:
i = 0
first_non_removing_index = 0
while (i < len(df)):
    while (i < len(df) and df.iloc[i]['bike_removed'] == False):
        i += 1
    if (i >= len(df)):
        break
    removing_row = df.iloc[i]
    time = removing_row['minutes_from_midnight']
    times_until_now = df.loc[first_non_removing_index:i-1, 'minutes_from_midnight']
    difference = abs(time - times_until_now)
    df.loc[first_non_removing_index:(i-1 if i > 0 else 0), 'wait_for_bike_taken'] = difference 
    date_time = removing_row['date']
    date_times_until_now = df.loc[first_non_removing_index:i-1, 'date']
    date_difference = abs((date_time - date_times_until_now).apply(lambda x: x.total_seconds() / 60))
    df.loc[first_non_removing_index:(i-1 if i > 0 else 0), 'wait_for_bike_taken_as_date'] = date_difference 
    first_non_removing_index = i
    i += 1
# if we can't know when the next bike will be taken, fill with -1 for now
df = df.fillna(-1)
df.head(50)

Unnamed: 0,index,ts,lat,lon,operative,sid,name,bikes,total_slots,hour_and_minutes,...,weekday,weekend,bike_added,bike_removed,max_wait_for_new_bike,max_wait_for_new_bike_as_date,idle_for,idle_for_as_date,wait_for_bike_taken,wait_for_bike_taken_as_date
0,0,2018-05-11T07:48:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",5,40,07:48,...,4,False,False,False,16.0,16.0,0.0,0.0,1.0,1.0
1,1,2018-05-11T07:49:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:49,...,4,False,False,True,16.0,16.0,0.0,0.0,10.0,10.0
2,2,2018-05-11T07:50:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:50,...,4,False,False,False,16.0,16.0,1.0,1.0,9.0,9.0
3,3,2018-05-11T07:51:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:51,...,4,False,False,False,16.0,16.0,2.0,2.0,8.0,8.0
4,4,2018-05-11T07:52:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:52,...,4,False,False,False,16.0,16.0,3.0,3.0,7.0,7.0
5,5,2018-05-11T07:53:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:53,...,4,False,False,False,16.0,16.0,4.0,4.0,6.0,6.0
6,6,2018-05-11T07:54:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:54,...,4,False,False,False,16.0,16.0,5.0,5.0,5.0,5.0
7,7,2018-05-11T07:55:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:55,...,4,False,False,False,16.0,16.0,6.0,6.0,4.0,4.0
8,8,2018-05-11T07:56:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:56,...,4,False,False,False,16.0,16.0,7.0,7.0,3.0,3.0
9,9,2018-05-11T07:57:01,60.184312,24.826671,1,541,"Aalto-yliopisto (M), Korkeakouluaukio",4,40,07:57,...,4,False,False,False,16.0,16.0,8.0,8.0,2.0,2.0


## Remove useless or codependent columns

In [35]:
df = df.drop(columns=['index', 'ts', 'sid', 'hour_and_minutes', 'name', 'bike_added', 'bike_removed', 'lat', 'lon', 'bikes', 'total_slots', 'date'])
print('using columns', df.columns.values)

using columns ['operative' 'month' 'bikes_percent' 'minutes_from_midnight' 'hour'
 'weekday' 'weekend' 'max_wait_for_new_bike'
 'max_wait_for_new_bike_as_date' 'idle_for' 'idle_for_as_date'
 'wait_for_bike_taken' 'wait_for_bike_taken_as_date']


## Define training and test sets

In [36]:
# drop the columns we would not know in a real situation, as well as the column to be predicted
X = df.drop(columns=['max_wait_for_new_bike', 'wait_for_bike_taken']).as_matrix()
y = df['max_wait_for_new_bike'].as_matrix()
# set 
train_size = int(len(X)*0.8)

# define the training and test sets
X_train = torch.from_numpy(np.asarray(X[:train_size].astype('float')))
# take transpose to get right dimensions for torch
y_train = torch.from_numpy(np.asarray([y[:train_size]]).T)
X_test = torch.from_numpy(np.asarray(X[train_size:].astype('float')))
y_test = torch.from_numpy(np.asarray([y[train_size:]]).T)
# combine input and labels
train_data = torch.cat((X_train, y_train), 1)
test_data = torch.cat((X_test, y_test), 1)

# sanity check
assert len(X) == len(X_train) + len(X_test)

## Define a simple neural network

In [59]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.l1 = nn.Linear(11, 32)
        self.l2 = nn.Linear(32, 64)
        self.l3 = nn.Linear(64, 128)
        self.l4 = nn.Linear(128, 256)
        self.l5 = nn.Linear(256, 128)
        self.l6 = nn.Linear(128, 64)
        self.l9 = nn.Linear(64, 8)
        self.l10 = nn.Linear(8, 1)

    def forward(self, x):
        x = self.l1(x)
        x = funcs.relu(self.l2(x))
        x = funcs.tanh(self.l3(x))
        x = funcs.sigmoid(self.l4(x))
        x = funcs.tanh(self.l5(x))
        x = funcs.tanh(self.l6(x))
        x = funcs.relu(self.l9(x))
        return self.l10(x)

## Define loss function

In [61]:
loss_func = torch.nn.SmoothL1Loss()

## Define training loop

In [62]:
net = Net().double()
optimizer = torch.optim.SGD(net.parameters(), lr=0.05)
batch_size = 250
epochs = 500
for epoch in range(epochs):
    # train the data in batches
    batch_indices = torch.randperm(len(train_data))[:batch_size]
    data = train_data[batch_indices]
    # assume that last cell in array is label
    inp, label = data[:, :data.shape[1]-1], data[:, data.shape[1]-1]
    # torch has to be fed variables
    input_var = Variable(inp)
    label_var = Variable(label.reshape(label.shape[0], 1))
    # reset parameters after each training round
    optimizer.zero_grad()
    # actual forward pass
    outputs = net(input_var)
    # calculate loss
    loss = loss_func(outputs, label_var)
    if epoch % 50 == 0:
        print('e:', epoch)
        print('loss', loss.item())
        print('--')
    loss.backward()
    optimizer.step()
print('training done with a training set of length', len(train_data))

e: 0
loss 275.24610062847313
--
e: 50
loss 277.20496562227726
--
e: 100
loss 199.92841372853783
--
e: 150
loss 245.60253919054045
--
e: 200
loss 220.0805204794897
--
e: 250
loss 192.7805904672157
--
e: 300
loss 207.14161373736536
--
e: 350
loss 239.165343190196
--
e: 400
loss 213.13420211326488
--
e: 450
loss 218.67697862381812
--
training done with a training set of length 102418


## Evaluate with test set

In [63]:
total_loss = 0
for i, test in enumerate(test_data):
    inp, label = test[:data.shape[1]-1], test[data.shape[1]-1]
    test_input_var = Variable(inp)
    test_label_var = Variable(label)
    test_outputs = net(test_input_var)
    # just print out the loss for now
    test_loss = loss_func(test_outputs, test_label_var)
    # this is basically just to see whether the net always predicted the same thing
    if i < 20:
        print('true label', test_label_var.item(), ', prediction', test_outputs.item())
    total_loss = test_loss.item()
print('average loss', total_loss/len(test_data))

true label 11.0 , prediction 25.028935952097275
true label 11.0 , prediction 25.028935469070735
true label 11.0 , prediction 25.028935147665777
true label 11.0 , prediction 25.02893889520709
true label 11.0 , prediction 25.028938717856143
true label 11.0 , prediction 25.028938522085184
true label 11.0 , prediction 25.028938305981093
true label 34.0 , prediction 25.028940170684862
true label 34.0 , prediction 25.028940125807797
true label 34.0 , prediction 25.02894007626968
true label 34.0 , prediction 25.028940021586426
true label 34.0 , prediction 25.028939981984635
true label 34.0 , prediction 25.02893994727447
true label 34.0 , prediction 25.02893991062353
true label 34.0 , prediction 25.028940458943495
true label 34.0 , prediction 25.02894044400566
true label 34.0 , prediction 25.028940427516357
true label 34.0 , prediction 25.02894040931442
true label 34.0 , prediction 25.02894039105941
true label 34.0 , prediction 25.028940379222092
average loss 0.0009970293309065107


## Use different loss

In [64]:
loss_func = torch.nn.MSELoss()

In [65]:
net = Net().double()
optimizer = torch.optim.SGD(net.parameters(), lr=0.05)
batch_size = 250
epochs = 500
for epoch in range(epochs):
    # train the data in batches
    batch_indices = torch.randperm(len(train_data))[:batch_size]
    data = train_data[batch_indices]
    # assume that last cell in array is label
    inp, label = data[:, :data.shape[1]-1], data[:, data.shape[1]-1]
    # torch has to be fed variables
    input_var = Variable(inp)
    label_var = Variable(label.reshape(label.shape[0], 1))
    # reset parameters after each training round
    optimizer.zero_grad()
    # actual forward pass
    outputs = net(input_var)
    # calculate loss
    loss = loss_func(outputs, label_var)
    if epoch % 50 == 0:
        print('e:', epoch)
        print('loss', loss.item())
        print('--')
    loss.backward()
    optimizer.step()
print('training done with a training set of length', len(train_data))

e: 0
loss 230064.06366543914
--
e: 50
loss 173181.85866046816
--
e: 100
loss 160459.54957422375
--
e: 150
loss 198285.55105468695
--
e: 200
loss 163753.8045071396
--
e: 250
loss 165148.46526244932
--
e: 300
loss 157012.3635522407
--
e: 350
loss 198059.71368958388
--
e: 400
loss 158357.02911483365
--
e: 450
loss 149627.04819463016
--
training done with a training set of length 102418


In [66]:
total_loss = 0
for i, test in enumerate(test_data):
    inp, label = test[:data.shape[1]-1], test[data.shape[1]-1]
    test_input_var = Variable(inp)
    test_label_var = Variable(label)
    test_outputs = net(test_input_var)
    # just print out the loss for now
    test_loss = loss_func(test_outputs, test_label_var)
    if i < 20:
        print('true label', test_label_var.item(), ', prediction', test_outputs.item())
    total_loss = test_loss.item()
print('average loss', total_loss/len(test_data))

true label 11.0 , prediction 250.05519637229173
true label 11.0 , prediction 250.05519637229173
true label 11.0 , prediction 250.05519637229173
true label 11.0 , prediction 250.05519637229173
true label 11.0 , prediction 250.05519637229173
true label 11.0 , prediction 250.05519637229173
true label 11.0 , prediction 250.05519637229173
true label 34.0 , prediction 250.05519637229173
true label 34.0 , prediction 250.05519637229173
true label 34.0 , prediction 250.05519637229173
true label 34.0 , prediction 250.05519637229173
true label 34.0 , prediction 250.05519637229173
true label 34.0 , prediction 250.05519637229173
true label 34.0 , prediction 250.05519637229173
true label 34.0 , prediction 250.05519637229173
true label 34.0 , prediction 250.05519637229173
true label 34.0 , prediction 250.05519637229173
true label 34.0 , prediction 250.05519637229173
true label 34.0 , prediction 250.05519637229173
true label 34.0 , prediction 250.05519637229173
average loss 2.4615782708662355
