In this homework assignment, you are requested to implement a full backprop algorithm using only *numpy*.

- We assume sigmoid activation across all layers.
- We assume a single value in the output layer

In [52]:
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
np.random.seed(42)

The following class represents a simple feed forward network with multiple layers. The network class provides methods for running forward and backward for a single instance, throught the network. You should implement the methods (indicated with TODO), that performs forward and backward for an entire batch. Note, the idea is to use matrix multiplications, and not running standard loops over the instances in the batch.

In [53]:
class MyNN:
  def __init__(self, learning_rate, layer_sizes):
    '''
    learning_rate - the learning to use in backward
    layer_sizes - a list of numbers, each number repreents the nuber of neurons
                  to have in every layer. Therfore, the length of the list
                  represents the number layers this network has.
    '''
    self.learning_rate = learning_rate
    self.layer_sizes = layer_sizes
    self.model_params = {}
    self.memory = {}
    self.grads = {}

    # Initializing weights
    for layer_index in range(len(layer_sizes) - 1):
      W_input = layer_sizes[layer_index + 1]
      W_output = layer_sizes[layer_index]
      self.model_params['W_' + str(layer_index + 1)] = np.random.randn(W_input, W_output) * 0.1
      self.model_params['b_' + str(layer_index + 1)] = np.random.randn(W_input) * 0.1


  def forward_single_instance(self, x):
    a_i_1 = x
    self.memory['a_0'] = x
    for layer_index in range(len(self.layer_sizes) - 1):
      W_i = self.model_params['W_' + str(layer_index + 1)]
      b_i = self.model_params['b_' + str(layer_index + 1)]
      z_i = np.dot(W_i, a_i_1) + b_i
      a_i = 1/(1+np.exp(-z_i))
      self.memory['a_' + str(layer_index + 1)] = a_i
      a_i_1 = a_i
    return a_i_1


  def log_loss(self, y_hat, y):
    '''
    Logistic loss, assuming a single value in y_hat and y.
    '''
    m = y_hat[0]
    cost = -y[0]*np.log(y_hat[0]) - (1 - y[0])*np.log(1 - y_hat[0])
    return cost


  def backward_single_instance(self, y):
    a_output = self.memory['a_' + str(len(self.layer_sizes) - 1)]
    dz = a_output - y

    for layer_index in range(len(self.layer_sizes) - 1, 0, -1):
      print(layer_index)
      a_l_1 = self.memory['a_' + str(layer_index - 1)]
      dW = np.dot(dz.reshape(-1, 1), a_l_1.reshape(1, -1))
      self.grads['dW_' + str(layer_index)] = dW
      W_l = self.model_params['W_' + str(layer_index)]
      dz = (a_l_1 * (1 - a_l_1)).reshape(-1, 1) * np.dot(W_l.T, dz.reshape(-1, 1))
      # TODO: calculate and memorize db as well.

  # TODO: update weights with grads
  #def update(self):

  # TODO: implement forward for a batch X.shape = (network_input_size, number_of_instance)
  #def forward_batch(self, X)

  # TODO: implement backward for a batch y.shape = (1, number_of_instance)
  #def backward_batch(self, y)

  # TODO: implement log_loss_batch, for a batch of instances
  # def log_loss(self, y_hat, y):

In [54]:
nn = MyNN(0.01, [3, 2, 1])

In [55]:
nn.model_params

{'W_1': array([[ 0.04967142, -0.01382643,  0.06476885],
        [ 0.15230299, -0.02341534, -0.0234137 ]]),
 'b_1': array([0.15792128, 0.07674347]),
 'W_2': array([[-0.04694744,  0.054256  ]]),
 'b_2': array([-0.04634177])}

In [56]:
x = np.random.randn(3)
y = np.random.randn(1)

y_hat = nn.forward_single_instance(x)
print(y_hat)

[0.48946]


In [57]:
nn.backward_single_instance(y)

2
1


In [91]:
def train(X, y, epochs, batch_size):
  '''
  Train procedure, please note the TODOs inside
  '''
  for e in range(1, epochs + 1):
    epoch_loss = 0

    # Shuffle the numpy array but keep the same order of shuffeling between the 1-axis arrays
    permutation = np.random.permutation(X.shape[1])
    X = np.array([sub_x[permutation] for sub_x in X])
    y = y[0][permutation].reshape(1, X.shape[1])

    # Split the numpy arrays into batches
    chunks = np.arange(batch_size,X.shape[1],batch_size)
    X_chunks = np.split(X, chunks, 1)
    y_chunks = np.split(y, chunks, 1)
    batches = [(X_b, y_b) for X_b, y_b in zip(X_chunks, y_chunks)]
      
    for X_b, y_b in batches:
      y_hat = nn.forward_batch(X_b)
      epoch_loss += nn.log_loss_batch(y_hat, y_b)
      nn.backward_batch(y_b)
      nn.update()
    print(f'Epoch {e}, loss={epoch_loss/len(batches)}')


In [92]:
# TODO: Make sure the following network trains properly

nn = MyNN(0.001, [6, 4, 3, 1])

X = np.random.randn(6, 100)
y = np.random.randn(1, 100)
batch_size = 8
epochs = 2

#print(f"X is {X}")
#print(X.shape)
#print(f"y is {y}")
#print(y.shape)

train(X, y, epochs, batch_size)

AttributeError: 'MyNN' object has no attribute 'forward_batch'

#TODO: train on an external dataset

Train on the *hour.csv* file with a split of 75% training 10% validation and 15% for test.
Use the following features from the data:

* temp
* atemp
* hum
* windspeed
* weekday

The response variable is, *success*

The architecture of the network should be: [5, 40, 30, 10, 7, 5, 3, 1].

Use batch_size=8, and train it for 100 epochs on the train set (based on the split as requested above).

Then, plot train and validation loss per epoch.

##  your code goes here

###  Data Preprocessing

In [93]:
df = pd.read_csv("hour.csv")

display(df)

features = df[["temp", "atemp", "hum", "windspeed", "weekday"]].to_numpy()
features = np.transpose(features)
print(features)

classification = np.array(df['success']).astype(int)
classification = np.expand_dims(classification, axis=0)
print(classification)

print(features.shape)
print(classification.shape)

# TODO: Preprocess the bike sharing dataset ('hour.csv')
# - Load the dataset from the provided hour.csv file
# - Select the required features (temp, atemp, hum, windspeed, weekday)
# - Extract the target variable (success)
# - Normalize/standardize features if necessary
# - Split the data into training (75%), validation (10%), and test (15%) sets
# - Create DataLoader objects with batch_size=8

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt,success
0,1,01/01/2011,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0000,3,13,16,False
1,2,01/01/2011,1,0,1,1,0,6,0,1,0.22,0.2727,0.80,0.0000,8,32,40,False
2,3,01/01/2011,1,0,1,2,0,6,0,1,0.22,0.2727,0.80,0.0000,5,27,32,False
3,4,01/01/2011,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0000,3,10,13,False
4,5,01/01/2011,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0000,0,1,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17374,17375,31/12/2012,1,1,12,19,0,1,1,2,0.26,0.2576,0.60,0.1642,11,108,119,False
17375,17376,31/12/2012,1,1,12,20,0,1,1,2,0.26,0.2576,0.60,0.1642,8,81,89,False
17376,17377,31/12/2012,1,1,12,21,0,1,1,1,0.26,0.2576,0.60,0.1642,7,83,90,False
17377,17378,31/12/2012,1,1,12,22,0,1,1,1,0.26,0.2727,0.56,0.1343,13,48,61,False


[[0.24   0.22   0.22   ... 0.26   0.26   0.26  ]
 [0.2879 0.2727 0.2727 ... 0.2576 0.2727 0.2727]
 [0.81   0.8    0.8    ... 0.6    0.56   0.65  ]
 [0.     0.     0.     ... 0.1642 0.1343 0.1343]
 [6.     6.     6.     ... 1.     1.     1.    ]]
[[0 0 0 ... 0 0 0]]
(5, 17379)
(1, 17379)


### Model Training


In [None]:
# TODO: Train the neural network
# - Implement the network with architecture [5, 40, 30, 10, 7, 5, 3, 1]
# - Train for exactly 100 epochs on the training set
# - Use batch_size=8 as specified
# - Calculate and store train and validation loss for each epoch
# - Track training progres

### Visualization

In [None]:
# TODO: Create visualizations of the learning process
# - Plot the training loss per epoch
# - Create additional relevant plots (validation loss, learning curves, etc.)
# - Make sure all plots have proper labels, titles, and legends
# - Add brief analysis of what the plots reveal about your model's performance

### Model Evaluation


In [None]:
# TODO: Evaluate model performance on the test set
# - Calculate and report the loss on the test set
# - Calculate and report the accuracy on the test set
# - Compare test performance with training/validation performance
# - Analyze model strengths and weaknesses
# - Discuss any overfitting/underfitting issues observed