In [3]:
import numpy as np
import copy
import sys

def sigmoid(x):
    return 1.0/(1.0 + np.exp(-x))

def ReLU(x):
    return np.maximum(x,np.zeros_like(x))

"""
def ReLU_derivative(x):
    if x >= 0: return 1
    return 0
"""

#read in data - a row is a sensor, a column is a point in time
traffic_file = open('/Users/mitch/Dropbox/FinalProject/traffic2.txt')
traffic_lines = traffic_file.readlines()
traffic_file.close()

In [4]:
num_sensors = len(traffic_lines)
num_sensors_considered = 1 #For now, I will look at 1 sensor at a time
num_timepoints = len(traffic_lines[0].split())

traffic = np.zeros([num_sensors,num_timepoints])
for i in range(num_sensors):
    tokens = traffic_lines[i].split()
    for j in range(num_timepoints): traffic[i,j] = float(tokens[j])

#setup neural network
rolling_window_hours = 4
rolling_window = rolling_window_hours * 6 #each time point is separated by 10 minutes
hidden_units = 10
#this will map from the input layer and the previous hidden layer to the hidden layer
v = np.random.normal(scale=1.0/np.sqrt(num_sensors_considered + hidden_units),
                        size=[hidden_units,num_sensors_considered + hidden_units + 1])
#this will map from the hidden layer to the output layer
w = np.random.normal(scale=1.0/np.sqrt(hidden_units),size=[num_sensors_considered,hidden_units + 1])

v_update = np.zeros_like(v)
w_update = np.zeros_like(w)

alpha = 0.001 #learning rate
training_set = np.random.binomial(1, 0.8, num_sensors)

In [10]:
#train
step = 0
total_error = 0
train_max = 1440
average = 0
sensors_trained = 0
error_count = 0
print("Training Error")
for sensor_iteration in range(num_sensors):
    if training_set[sensor_iteration] == 0: continue
    train_sensor = sensor_iteration
    sensors_trained += 1
    if sensors_trained > 50: break
    for i in range(train_max): #range(num_timepoints-rolling_window):
        #print "sensor: ", train_sensor
        step += 1
        if step % (train_max * 10) == 0:
            print(step, total_error / error_count)
            total_error = 0
            error_count = 0
            alpha *= 0.75
        history = traffic[train_sensor,i:i+rolling_window]
        previous_hidden_layers = list()
        #previous_hidden_layers.append(np.zeros(hidden_units))
        previous_hidden_layers.append(np.zeros(hidden_units))
        grad_w_of_L = list()
        grad_v_of_L = list()
        
        #forward propagation
        for j in range(rolling_window):
            input = np.append(history[j],previous_hidden_layers[-1])
            input = np.append(input,[1.0])
            #print "input: ", input
            hidden = ReLU(np.dot(v,input))
            hidden_one = np.append(hidden,[1.0])
            previous_hidden_layers.append(copy.deepcopy(hidden))
            prediction = np.dot(w, hidden_one) #linear output
            next = traffic[train_sensor,i+j+1] #- traffic[train_sensor,i+j]
            error = prediction - next
            if j == rolling_window - 1:
                average += next
                total_error += np.abs(error[0])
                error_count += 1
            #print "error: ", error
            #print "hidden: ", hidden
            grad_w_of_L.append(2 * error * hidden_one)
            grad_h_of_L = 2 * error * np.transpose(w)
            grad_h_of_L = grad_h_of_L[:-1] #remove the last row of grad_h_of_L, corresponding to the response of L to changing the constant 1, which we won't do
            grad_v_of_h = np.zeros(v.shape)
            for k in range(hidden_units):
                if np.dot(input,v[k]) > 0: grad_v_of_h[k] = input
            grad_v_of_L.append(grad_h_of_L * grad_v_of_h)
        #print step, total_error

        #back propagation
        for j in range(rolling_window):
            v_update -= grad_v_of_L[-j-1]
            w_update -= grad_w_of_L[-j-1]

        v += alpha * v_update
        w += alpha * w_update

        v_update *= 0
        w_update *= 0

average *= 1.0 / step

#print "v: ", v
#print "w: ", w

Training Error
14400 0.0182575680142
28800 0.0133475364784
43200 0.0106922764265
57600 0.00837540997064
72000 0.00908710081782


In [None]:
#test
total_error = 0
MAD = 0
step = 0
sensors_tested = 0
error_count = 0
print("Test Error")
for sensor_iteration in range(num_sensors):
    if training_set[sensor_iteration] == 1: continue
    train_sensor = sensor_iteration
    sensors_tested += 1
    for i in range(train_max): #range(num_timepoints-rolling_window):
        step += 1
        #if step % (train_max * 100) == 0:
        if sensors_tested > 1:
            print(step, total_error / (error_count))
            print("MAD: ", MAD / (error_count))
            sys.exit()
            total_error = 0
            error_count = 0
        history = traffic[train_sensor,i:i+rolling_window]
        previous_hidden_layers = list()
        previous_hidden_layers.append(np.zeros(hidden_units))
        for j in range(rolling_window):
            input = np.append(history[j],previous_hidden_layers[-1])
            input = np.append(input,[1.0])
            hidden = ReLU(np.dot(v,input))
            hidden_one = np.append(hidden,[1.0])
            #print hidden[0:5]
            previous_hidden_layers.append(copy.deepcopy(hidden))
            if j == rolling_window - 1:
                prediction = np.dot(w, hidden_one) #linear output
                next = traffic[train_sensor,i+j+1] #- traffic[train_sensor,i+j]
                print(prediction[0], next)
                error = prediction - next
                total_error += np.abs(error[0])
                MAD += np.abs(next - traffic[train_sensor,i+j])
                error_count += 1