# Training a commute prediction network, and visualizing learning!  
<ul> latest version available from: https://github.com/miroenev/teach_DL , prerequisites:
* Matplotlib, Numpy, Keras, and <a href="https://github.com/K3D-tools/K3D-jupyter">K3D</a> for realtime training 3D surface visualization
* TensorFlow/MxNet as the Keras backend for NN graph (queries model weights)

A video walkthrough of this notebook is <a href='https://youtu.be/HgbGJn9yz30'> available on YouTube</a>.

In [4]:
!KERNEL_BACKEND=mxnet

In [5]:
%matplotlib notebook

In [6]:
import numpy as np
import matplotlib.pylab as plt
from mpl_toolkits.mplot3d import Axes3D
import k3d

#set default figure size
plt.rcParams['figure.figsize'] = [9.5, 5]

# Define the problem

Lets try to predict commute duration from two observable independent variables: the time of day and the weather conditions.

<img src='figures/commute.png' width='400'/>
<img src='figures/target_distribution.PNG' width='1000'/>
In this toy example we'll first take on the role of the 'traffic gods' and decree that commute duration is defined through a linear mixture of the two independent variables. Later we'll sample from the distribution defined by these variables and generate a training dataset. This sampling procedure will be analogous to keeping a journal of all of our commutes for some [ long ] period of time, where each log entry consists of a set of  
* <b>X</b>: [ time-of-departure, weather-condition ], and the associated  
* <b>Y</b>: [ commute-duration ].

<img src='figures/x_y_mapping.PNG' width='900'/>

Given such a journal [dataset], we'll split it into training (75%) and testing (25%) subsets which we'll use to train and evaulate our model respectively. Specifically, we'll build a neural network model whose weights are initially randomly initialized, but are trained/updated as we stream the training data through (via the backpropagation learning algorithm). Each update will get us closer to having a model that has learned the relationship between X and Y or ([ time-of-departure, weather-condition ] to [ commute-duration] ).

<img src='figures/process.PNG' width='800'/>

During the training process we'll try to visualize the network's behavior by asking it to predict all the entries in our logbook using its current parameters/weights. As the training process unfolds, you should be able to see how the network adapts itself to the target surface/function that we determined for the commute duration.

<img src='figures/training_progress.PNG' width='700'/>

# Determine underlying relationship
We'll start by esablishing (as traffic gods) the relationships between:  
* 1) the time a commute starts (time-of-departure variable) and commute-duration
* 2) the weather when a commute is started (weather-condtion variable) and commute-duration

Note that as data scientists we never get to see this function, but we try to learn it from data.

In [7]:
# define data coordinates
xRange = [0, 3*np.pi]; 
yRange = [0, 3*np.pi]; numSteps = 100

x, y = np.meshgrid( np.linspace(xRange[0], xRange[1], numSteps),
                    np.linspace(yRange[0], yRange[1], numSteps), indexing='ij' )

def normalize_domain (x):
    x = x + np.abs(np.min(x))
    x = x / (np.max(x) + .001)
    return x

# define 1D relationships to target
xComponent = np.sin( x ) * 2
yComponent = np.exp( y / 5 )

# define 2D joint distribution
z = xComponent + yComponent
z = normalize_domain(z)

In [8]:
# plot independent variables
plt.figure( figsize = ( 7, 7) )
plt.subplots_adjust( left = 0.1, right = 0.9, top = 0.9, bottom = 0.1, wspace = 0.2 )
plt.subplot(2,1,1); plt.plot(normalize_domain(xComponent[:,0])); plt.xlabel('time-of-day'), plt.ylabel('commute duration')
plt.xticks([]), plt.yticks([])
plt.subplot(2,1,2); plt.plot(normalize_domain(yComponent[0,:])); plt.xlabel('weather [ severity ]'), plt.ylabel('commute duration')
plt.xticks([]), plt.yticks([])

# plot target [dependent] variable
plt.figure( figsize = (9, 9) )
plt.subplots_adjust( left = 0.1, right = 0.9, top = 0.9, bottom = 0.1 )
ax = plt.subplot(1,1,1, projection='3d');
ax.plot_surface ( x[0::1], y[0::1], z[0::1], color = 'blue', alpha = 1, antialiased = False )
ax.set_xlabel('time of day')
ax.set_ylabel('severity of weather')
ax.set_zlabel('commute length')
plt.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Generate  dataset
Lets generate a dataset by randomly sampling from the target distribution [ with some noise ].

In [9]:
NSamples = 5000
noiseScaling = 1/8.

shuffledDataIndsX = np.random.randint(x.shape[0], size=(NSamples,1))
shuffledDataIndsY = np.random.randint(y.shape[0], size=(NSamples,1))

trainData = np.zeros( ( NSamples, 2 ) )
targetValues = np.zeros( (NSamples, 1 ))
noiseAmount = noiseScaling * ( np.random.rand(NSamples) - .5 )

for iSample in range (NSamples):
    trainData[iSample, 0] = x[ shuffledDataIndsX[iSample], 0 ]
    trainData[iSample, 1] = y[ 0, shuffledDataIndsY[iSample] ]    
    targetValues[iSample] = z[ shuffledDataIndsX[iSample], shuffledDataIndsY[iSample] ] + noiseAmount[iSample]


# Plot dataset samples (red dots) overlayed onto target distribution (blue)

In [11]:
def plot_3D_data (k3dPlot):
    zScaling = 5

    offset = np.hstack( ( np.ones((trainData.shape[0], 1)) * -5, 
                          np.ones((trainData.shape[0], 1)) + 4, 
                          np.zeros((trainData.shape[0], 1)) ) ) * np.abs(xRange[1]-xRange[0])

    k3dPlot += k3d.points ( np.hstack( ( trainData, targetValues*zScaling) ) + offset, color=0xFF0000, point_size = .2, shader = 'flat' )
    k3dPlot += k3d.surface ( np.rot90(z,-1)*zScaling, color=0x0055FF, xmin=np.min(trainData[:,0]+offset[::,0]), xmax=np.max(trainData[:,0]+offset[::,0]), ymin=np.min(trainData[:,1]+offset[::,1]), ymax=np.max(trainData[:,1]+offset[::,1]))
    
    return zScaling, offset

plot = k3d.plot()
_, _ = plot_3D_data(plot)
plot.display()

  np.dtype(self.dtype).name))


Output()

# Define Backend

In [12]:
# RUN THIS FOR MXNET
import mxnet
!KERAS_BACKEND=mxnet

# Define model structure

In [13]:
import keras

Using MXNet backend.


In [14]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras import metrics
import keras

model = Sequential()
model.add( Dense( 10, input_dim = 2 ))
model.add( Dense( 10, activation = 'sigmoid' ))
model.add( Dense( 1 ))

model.compile(optimizer='adam', loss='mse')

# Visualize network structure

In [15]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
dense_1 (Dense)                  (None, 10)            30          dense_input_1[0][0]              
____________________________________________________________________________________________________
dense_2 (Dense)                  (None, 10)            110         dense_1[0][0]                    
____________________________________________________________________________________________________
dense_3 (Dense)                  (None, 1)             11          dense_2[0][0]                    
Total params: 151
Trainable params: 151
Non-trainable params: 0
____________________________________________________________________________________________________


In [16]:
import matplotlib.pylab as plt

In [17]:
import sys
sys.path.append('utils')
import nnViz
import importlib

In [20]:
?keras.models.copy

In [21]:
importlib.reload(nnViz)
model.compile(optimizer='adam', loss='mse')
plt.figure()
nnViz.visualize_model(model)
#untrainedModelCopy = keras.models.clone_model(model)

<IPython.core.display.Javascript object>

Note that the weights all have a similar width/thickness as they are initialized from the same random distribution.

Model structure [loosely] inspired by NVIDIA's new HQ ;]  
http://c.ymcdn.com/sites/aiascv.org/resource/resmgr/meeting_images/2017/March/Nv2.jpeg

# Train Network (and dynamically visualize)

In [22]:
batchSize = 256
numberEpochsToTrain = 100
displayUpdateInterval = 10

In [25]:
currentNN = {}

''' offsets need to keep track of where to plot the latest model predictions '''
xOffset = np.zeros( (trainData.shape[0], 1))
yOffset = np.zeros( (trainData.shape[0], 1))
zOffset = np.zeros( (trainData.shape[0], 1))

xModifier = 1 * np.abs(xRange[1]-xRange[0])*1.2; yModifier = 0; zModifier = 0; 
plotCount = 0

# placeholders to store loss history
evalLoss = np.empty((numberEpochsToTrain))
evalLoss[:] = np.NaN

# 3D plot
plot = k3d.plot()
zScaling, offset = plot_3D_data(plot)
plot.display()

''' training & plotting loop '''
for iEpoch in range( numberEpochsToTrain ):
    
    # update model parameters
    model.fit ( trainData, targetValues, 
               nb_epoch = 1, 
               batch_size = batchSize,
               verbose = 0, 
               shuffle = True )
     
    # store loss/error history
    evalLoss[iEpoch] = model.evaluate( trainData, targetValues )
    
    # plot logic
    if iEpoch % displayUpdateInterval == 0:
        comboOffset = np.hstack( (xOffset + xModifier, yOffset + yModifier, zOffset + zModifier) )
        
        currentNN[plotCount] = model.predict( trainData )
        
        print('\n current error: ' + str( evalLoss[plotCount]) )
        plot += k3d.points ( np.hstack( ( trainData, currentNN[plotCount] * zScaling) ) + comboOffset + offset, color=0xA9A9FF, point_size = .2, shader = 'flat' )        
        #plot += k3d.text2d ( str( round(evalLoss[plotCount], 4 )), comboOffset + offset + (0, 0, 3), color=0xff00ff, size=.5, reference_point='rb')
        
        plotCount += 1
        if plotCount % 8 == 0:
            xModifier = 1 * np.abs(xRange[1]-xRange[0])*1.2
            yModifier -= 1 * np.abs(yRange[1]-yRange[0])*1.2
        else:
            xModifier += 1 * np.abs(xRange[1]-xRange[0])*1.2

  np.dtype(self.dtype).name))


Output()

  force_init=force_init)


 current error: 0.0289853025317
 current error: 0.0230785168141
 current error: 0.0207477136314
 current error: 0.0196621583909
 current error: 0.019124919802
 current error: 0.0187815335304
 current error: 0.0183166479528
 current error: 0.018048277244
 current error: 0.017827846545
 current error: 0.0176020044506

# Plot learning curve

In [26]:
plt.figure()
plt.plot(evalLoss[:], 'b')
plt.plot(evalLoss[:], 'or', markersize=3)
plt.title('training error')
plt.xlabel('epochs')
plt.ylabel('cumulative loss')

<IPython.core.display.Javascript object>

Text(0,0.5,'cumulative loss')

# Visualize network predictions <font color=FF0000>before</font> any training

In [27]:
plot = k3d.plot()
plot += k3d.points ( np.hstack( ( trainData, currentNN[0]*zScaling) ), color=0xFF00FF, point_size = .3, shader = 'flat' )        
plot += k3d.surface ( np.rot90(z,-1) * zScaling, color=0x888888, xmin=np.min(xRange), xmax=np.max(xRange), ymin=np.min(yRange), ymax=np.max(yRange))
plot.display()

  np.dtype(self.dtype).name))


Output()

# Visualize network predictions <font color=FF0000>midway</font> through training

In [28]:
plot = k3d.plot()

plot += k3d.points ( np.hstack( ( trainData, currentNN[int(plotCount/2)]*zScaling) ), color=0xFF00FF, point_size = .3, shader = 'flat' )        
plot += k3d.surface ( np.rot90(z, -1) * zScaling, color=0x555555, xmin=np.min(xRange), xmax=np.max(xRange), ymin=np.min(yRange), ymax=np.max(yRange))
plot.display()

  np.dtype(self.dtype).name))


Output()

# Visualize network predictions at <font color=FF0000>end</font> of training

In [29]:
plot = k3d.plot()
plot += k3d.points ( np.hstack( ( trainData, currentNN[plotCount-1]*zScaling) ), color=0xFF00FF, point_size = .3, shader = 'flat' )        
plot += k3d.surface ( np.rot90(z, -1) * zScaling, color=0x555555, xmin=np.min(xRange), xmax=np.max(xRange), ymin=np.min(yRange), ymax=np.max(yRange))
plot.display()

  np.dtype(self.dtype).name))


Output()

# Visualize weights before and after training

In [32]:
importlib.reload(nnViz)

plt.figure(figsize=(8,10))
'''
plt.subplot(2,1,1)
nnViz.visualize_model(untrainedModelCopy)
plt.title('untrained');

plt.subplot(2,1,2)
'''
plt.title('trained');
nnViz.visualize_model(model)

<IPython.core.display.Javascript object>

In [43]:
print('thanks!')

thanks!
