In [34]:
import pandas as pd     # load pandas
import numpy as np      # load numpy
from sklearn.preprocessing import MinMaxScaler 
import matplotlib.pyplot as plt

scaler = MinMaxScaler()
pd.options.mode.chained_assignment = None

### Function Definitions

In [35]:
"""
The preprocess function simply drops all columns other than required columns - temperature and humidty
MinMax scaler is use to transform the relevant column data to lie between 0 and 1
"""
def preprocess(dataset):
    processed_dataset = dataset[[dataset.columns[3], dataset.columns[5]]]
    processed_dataset.columns = ['temperature', 'humidity']
  
    # # Normalization 
    processed_dataset[["temperature"]] = scaler.fit_transform(processed_dataset[["temperature"]])
    processed_dataset[["humidity"]] = scaler.fit_transform(processed_dataset[["humidity"]])
    return processed_dataset

"""
We use mean squared loss i.e. sum((y - yi)**2)
"""
def loss(W, Y, X):
    return np.sum((Y - X@W)**2)/X.shape[0]

"""
Derivative of the mean squared loss
"""
def derivative(W, Y, X):
    return 2*(X.T@(X@W - Y))/X.shape[0]

"""
Regularized loss with L2 regularization over weights
"""
def regularized_loss(W, Y, X, alpha):
    return np.sum((Y - X@W)**2)/X.shape[0] + alpha*np.sum(W**2)

"""
Derivative of regularized loss
"""
def regularized_derivative(W, Y, X, alpha):
    return 2*X.T@(X@W - Y)/X.shape[0] + 2*alpha*W

In [36]:
dataset_location = "https://raw.githubusercontent.com/gurnoor6/cs725/main/weatherHistory.csv"

### Loading the dataset

In [37]:
dataset = pd.read_csv(dataset_location, low_memory=False)
dataset.head()

Unnamed: 0,Formatted Date,Summary,Precip Type,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Loud Cover,Pressure (millibars),Daily Summary
0,2006-04-01 00:00:00.000 +0200,Partly Cloudy,rain,9.472222,7.388889,0.89,14.1197,251.0,15.8263,0.0,1015.13,Partly cloudy throughout the day.
1,2006-04-01 01:00:00.000 +0200,Partly Cloudy,rain,9.355556,7.227778,0.86,14.2646,259.0,15.8263,0.0,1015.63,Partly cloudy throughout the day.
2,2006-04-01 02:00:00.000 +0200,Mostly Cloudy,rain,9.377778,9.377778,0.89,3.9284,204.0,14.9569,0.0,1015.94,Partly cloudy throughout the day.
3,2006-04-01 03:00:00.000 +0200,Partly Cloudy,rain,8.288889,5.944444,0.83,14.1036,269.0,15.8263,0.0,1016.41,Partly cloudy throughout the day.
4,2006-04-01 04:00:00.000 +0200,Mostly Cloudy,rain,8.755556,6.977778,0.83,11.0446,259.0,15.8263,0.0,1016.51,Partly cloudy throughout the day.


In [38]:
"""
Pre-process the dataset with our pre-process function!
"""
processed_ds = preprocess(dataset)
processed_ds.head()

Unnamed: 0,temperature,humidity
0,0.506975,0.89
1,0.505085,0.86
2,0.505445,0.89
3,0.487805,0.83
4,0.495365,0.83


### Splitting into test and train

In [39]:
Y = processed_ds['temperature'].to_numpy()                 # Convert temperature column to a numpy array
X = processed_ds['humidity'].to_numpy()                    # Convert humidty column to a numpy array

train_fraction = 0.8                                       # Say the train fraction is 0.8

indices = np.arange(0, X.shape[0])                         # Sample indices which we want to keep
np.random.seed(0)
np.random.shuffle(indices)                                 # shuffle to sample randomly

train_size = int(X.shape[0]*train_fraction)

train_X = X[indices[:train_size]]
train_Y = Y[indices[:train_size]]

test_X = X[indices[train_size:]]
test_Y = Y[indices[train_size:]]

### Features and basis functions

In [40]:
"""
Here we consider basis functions to be: f0(x) = 1, f1(x) = x, f2(x) = x**2, f3(x) = x**3. Try adding more functions to the
list and check how the loss changes. Try playing with other parameters as well!
"""
train_X = np.stack([np.ones_like(train_X), train_X, train_X**2, train_X**3])
train_X = train_X.T
train_Y = train_Y[:, None]

test_X = np.stack([np.ones_like(test_X), test_X, test_X**2, test_X**3])
test_X = test_X.T
test_Y = test_Y[:, None]

### Gradient Descent

In [41]:
"""
Try playing around with different parameters to see how it affects the loss
"""
W = np.random.rand(train_X.shape[1], 1) # initialize weights
epsilon = 1e-5                         # Stopping precision
lr = 1e-3                              # learning rate

last_loss = 0
epochs = 0

while(abs(loss(W, train_Y, train_X) - last_loss) > epsilon):
    last_loss = loss(W, train_Y, train_X)                   # compute loss
    dW = derivative(W, train_Y, train_X)                    # compute derivate
    W = W - lr*dW                                           # move in the opposite direction of the derivate
    epochs += 1    

print("Num epochs: ", epochs)

Num epochs:  2019


In [42]:
print("Mean weights: ", np.mean(W))
print("Training loss: ",loss(W, train_Y, train_X))
print("Test loss: ", loss(W, test_Y, test_X))

Mean weights:  0.1575657972736026
Training loss:  0.041093496198285
Test loss:  0.04084759679192174


### Regularized Training

In [43]:
"""
Try playing around with different parameters to see how it affects the loss
"""
W = np.random.rand(train_X.shape[1], 1) # initialize weights
epsilon = 1e-5                         # Stopping precision
lr = 1e-3                               # learning rate
alpha = 1e-2

last_loss = 0
epochs = 0

while(abs(regularized_loss(W, train_Y, train_X, alpha) - last_loss) > epsilon):
    last_loss = regularized_loss(W, train_Y, train_X, alpha)                   # compute loss
    dW = regularized_derivative(W, train_Y, train_X, alpha)                    # compute derivate
    W = W - lr*dW                                           # move in the opposite direction of the derivate
    epochs += 1

print("Num epochs: ", epochs)

Num epochs:  3737


In [44]:
print("Mean weights: ", np.mean(W))
print("Training loss: ",loss(W, train_Y, train_X))
print("Test loss: ", loss(W, test_Y, test_X))

Mean weights:  0.15618855002065585
Training loss:  0.0426971229745167
Test loss:  0.04239950538710364


### Mini-Batch Gradient Descent

In [45]:
"""
Try playing around with different parameters to see how it affects the loss
"""
np.random.seed(0)
W = np.random.rand(train_X.shape[1], 1) # initialize weights
epsilon = 1e-6                         # Stopping precision
lr = 1e-3                               # learning rate
batch_sz = 128

last_loss = 0

indices = np.arange(0, train_X.shape[0])
np.random.shuffle(indices)

train_batch_X = train_X[indices[:batch_sz]]
train_batch_Y = train_Y[indices[:batch_sz]]
epochs = 0

while(abs(loss(W, train_batch_Y, train_batch_X) - last_loss) > epsilon):
    last_loss = loss(W, train_batch_Y, train_batch_X)                          # compute loss
    dW = derivative(W, train_batch_Y, train_batch_X)                           # compute derivate
    W = W - lr*dW                                                              # move in the opposite direction of the derivate

    np.random.shuffle(indices)                                                 # Randomly sample a batch
    train_batch_X = train_X[indices[:batch_sz]]               
    train_batch_Y = train_Y[indices[:batch_sz]]
    
    epochs += 1

print("Num Epochs: ", epochs)

Num Epochs:  5819


In [46]:
print("Mean weights: ", np.mean(W))
print("Training loss: ", loss(W, train_Y, train_X))
print("Test loss: ", loss(W, test_Y, test_X))

Mean weights:  0.12788518774504354
Training loss:  0.025310742057821644
Test loss:  0.0250601385943919
