In [1]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F 

%matplotlib inline
\
dtype = torch.float32

# Random state seed
seed = 1234
# For reproducibility
torch.manual_seed(seed)
# np.random.seed(seed)
torch.use_deterministic_algorithms(True)

In [2]:
def load_dataset():
    train_dataset = h5py.File('/kaggle/input/happy-dataset/train_happy.h5', "r")
    test_dataset = h5py.File('/kaggle/input/happy-dataset/test_happy.h5', "r")

    train_set_x_orig = np.array(train_dataset["train_set_x"][:])
    train_set_y_orig = np.array(train_dataset["train_set_y"][:])
    test_set_x_orig = np.array(test_dataset["test_set_x"][:])
    test_set_y_orig = np.array(test_dataset["test_set_y"][:])
    classes = np.array(test_dataset["list_classes"][:])

    train_set_y_orig = np.transpose(train_set_y_orig.reshape((1, train_set_y_orig.shape[0])))
    test_set_y_orig = np.transpose(test_set_y_orig.reshape((1, test_set_y_orig.shape[0])))

    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

train_set_X, train_set_y, test_set_X, test_set_y, classes = load_dataset()
# print("train set size", train_set_X.shape)
# print("train label size", train_set_y.shape)
# print("test set size", test_set_X.shape)
# print("test label size", test_set_y.shape)
# print("classes size", classes.shape)
# print(classes)
# print(train_set_y[123])
# plt.imshow(train_set_X[125])

In [3]:
def create_validation_set(train_set_X, train_set_y, test_size=0.2, random_state=seed):
    """
    Divides the training set into training and validation set
    
    Input:
    - train_set_X: Training set samples containing only features (no label)
    - train_set_y: Training set labels
    - test_size: (optional) % of training data to be separated as validation data
    
    Output:
    - X_train: Training samples
    - y_train: Training labels
    - X_valid: Validation samples
    - y_valid: Validation labels
    """
    X_train, X_valid, y_train, y_valid = train_test_split(train_set_X, train_set_y, test_size=test_size, random_state=seed)
    print("Train set size:", X_train.shape)
    print("Validation set size:", X_valid.shape)
    return X_train, y_train, X_valid, y_valid

train_X, y_train, valid_X, y_valid = create_validation_set(train_set_X, train_set_y, test_size=0.2)
print("Train set size:", train_X.shape)
print("Train label size:", y_train.shape)
print("Validation set size:", valid_X.shape)
print("Validation set size:", y_valid.shape)

# Preparing the data
X_train = train_X.reshape(480, 64*64*3)
X_valid = valid_X.reshape(120, 64*64*3)
test_set_X = test_set_X.reshape(150, 64*64*3)
print("Training data shape", X_train.shape)
print("Validation data shape", X_valid.shape)
print("Test data shape", test_set_X.shape)

def normalization(x, mu, std):
    """
    Normalization
    
    Input:
    - x: data
    - mu: average
    - std: standard deviation
    
    Output:
    - x_scaled: normalized output
    """
    x_scaled = (x-mu)/std
    return x_scaled

# Normalize the data
mean_X = X_train.mean()
print(mean_X)
std_X = X_train.std()
print(std_X)
X_train_scl = normalization(X_train, mean_X, std_X)
X_valid_scl = normalization(X_valid, mean_X, std_X)
X_test_scl = normalization(test_set_X, mean_X, std_X)
# print("Train size", X_train_scl.shape)
# print("Maximum of ttrain", X_train_scl.max())
# print("Mean of ttrain", X_train_scl.mean())
# print("Validation size", X_valid_scl.shape)
# print("Maximum of valid", X_valid_scl.max())
# print("Mean of valid", X_valid_scl.mean())
# print("Test size", X_test_scl.shape)
# print("Maximum of test", X_test_scl.max())
# print("Mean of test", X_test_scl.mean())

##################
# Normalized data
##################
m = X_train.shape[0] # no of training samples
# print("No of training samples", m)
n = X_train.shape[1] # no of features
# print("No of features", n)
X_train_app = np.c_[np.ones(m), X_train_scl] # append a column of 1
X_valid_app = np.c_[np.ones(120), X_valid_scl] # append a column of 1
X_test_app = np.c_[np.ones(150), X_test_scl] # append a column of 1
alpha = 1e-3 # learning rate
iters = 1000 # no of iterations
# initial_theta = initialize_theta(n)
# theta, J = gradient_descent(X=X_train_app, y=y_train, theta=initial_theta, alpha=alpha, iters=iters)
# plt.plot(range(0,iters), J)

Train set size: (480, 64, 64, 3)
Validation set size: (120, 64, 64, 3)
Train set size: (480, 64, 64, 3)
Train label size: (480, 1)
Validation set size: (120, 64, 64, 3)
Validation set size: (120, 1)
Training data shape (480, 12288)
Validation data shape (120, 12288)
Test data shape (150, 12288)
142.07640957302516
81.1476808724206


In [4]:
def flatten_2d(x):
    m = x.shape[0] 
    flat_x = x.view(-1, m) # shape n by m
    return flat_x
def two_layer_fc(x, params):
    
    a0 = flatten_2d(x)   # n by m   
    w1, b1, w2, b2 = params
    
    a1 = F.relu(w1.mm(a0) + b1)
#     print("size of a1", a1.size())
    a2 = torch.sigmoid(w2.mm(a1) + b2)
#     print("size of a2", a2.size())
    return a2
def three_layer_fc(x, params):
    
    a0 = flatten_2d(x)   # n by m   
    w1, b1, w2, b2, w3, b3 = params
    
    a1 = F.relu(w1.mm(a0) + b1)
    a2 = F.relu(w2.mm(a1) + b2)
    a3 = torch.sigmoid(w3.mm(a2) + b3)
    return a3
def four_layer_fc(x, params):
    
    a0 = flatten_2d(x)   # n by m   
    w1, b1, w2, b2, w3, b3, w4, b4 = params
    
    a1 = F.relu(w1.mm(a0) + b1)
    a2 = F.relu(w2.mm(a1) + b2)
    a3 = F.relu(w3.mm(a2) + b3)
    a4 = torch.sigmoid(w4.mm(a3) + b4)
    return a4

def two_layer_fc_test():
    hidden_layer_size1 = 42
    hidden_layer_size2 = 21
    hidden_layer_size3 = 10
    x = torch.zeros((64, 50), dtype=dtype)  # minibatch size 64, feature dimension 50
    w1 = torch.zeros((hidden_layer_size1, 50), dtype=dtype)
    b1 = torch.zeros((hidden_layer_size1, 1), dtype=dtype)
    w2 = torch.zeros((hidden_layer_size2, hidden_layer_size1), dtype=dtype)
    b2 = torch.zeros((hidden_layer_size2, 1), dtype=dtype)
    w3 = torch.zeros((hidden_layer_size3, hidden_layer_size2), dtype=dtype)
    b3 = torch.zeros((hidden_layer_size3, 1), dtype=dtype)
    w4 = torch.zeros((1, hidden_layer_size3), dtype=dtype)
    b4 = torch.zeros((1, 1), dtype=dtype)
    scores = four_layer_fc(x, [w1, b1, w2, b2, w3, b3, w4, b4])
    print(scores.size())  # you should see [64, 10]

two_layer_fc_test()

torch.Size([1, 64])


In [5]:
def random_weight(shape):
    """
    Create random Tensors for weights; setting requires_grad=True means that we
    want to compute gradients for these Tensors during the backward pass.
    We use Kaiming normalization: sqrt(2 / fan_in)
    """
#     if len(shape) == 2:  # FC weight
    connections = shape[0]
#     else:
#         fan_in = np.prod(shape[1:]) # conv weight [out_channel, in_channel, kH, kW]
    # randn is standard normal distribution generator. 
    w = torch.randn(shape, dtype=dtype) * np.sqrt(2. / connections)
    w.requires_grad = True
    return w

def zero_weight(shape):
    return torch.zeros(shape, dtype=dtype, requires_grad=True)

def check_accuracy_part2(x, y, model_fn, params):
    
#     split = 'val' if loader.dataset.train else 'test'
#     print('Checking accuracy on the %s set' % split)
#     num_correct, num_samples = 0, 0
    with torch.no_grad():
#         for x, y in loader:
#             x = x.to(dtype=dtype)  # move to device, e.g. GPU
#             y = y.to(dtype=torch.int64)
#             scores = model_fn(x, params)
#             _, preds = scores.max(1)
#             num_correct += (preds == y).sum()
#             num_samples += preds.size(0)
#         acc = float(num_correct) / num_samples     
        preds = torch.round(model_fn(x, params))
        acc = (preds == torch.transpose(y, 0, 1)).sum() / preds.size(1)
        print("Accuracy", 100 * acc)
    return preds, acc

def train_part2(X_train, y_train, X_valid, y_valid, model_fn, params, learning_rate, iters, print_every=200):
    """
    Train a model on CIFAR-10.
    
    Inputs:
    - model_fn: A Python function that performs the forward pass of the model.
      It should have the signature scores = model_fn(x, params) where x is a
      PyTorch Tensor of image data, params is a list of PyTorch Tensors giving
      model weights, and scores is a PyTorch Tensor of shape (N, C) giving
      scores for the elements in x.
    - params: List of PyTorch Tensors giving weights for the model
    - learning_rate: Python scalar giving the learning rate to use for SGD
    
    Returns: loss
    """
#     J = np.zeros(iters) #initialize loss
#     for t in range(iters):
    # Move the data to the proper device (GPU or CPU)
    x = X_train.to(dtype=dtype)
    y = y_train.to(dtype=dtype)

    # Forward pass: compute scores and loss
    scores = model_fn(x, params)
    loss = F.binary_cross_entropy(scores, torch.transpose(y, 0, 1))
#     J[t] = loss
    # Backward pass: PyTorch figures out which Tensors in the computational
    # graph has requires_grad=True and uses backpropagation to compute the
    # gradient of the loss with respect to these Tensors, and stores the
    # gradients in the .grad attribute of each Tensor.
    loss.backward()

    # Update parameters. We don't want to backpropagate through the
    # parameter updates, so we scope the updates under a torch.no_grad()
    # context manager to prevent a computational graph from being built.
    with torch.no_grad():
        for w in params:
            w -= learning_rate * w.grad

            # Manually zero the gradients after running the backward pass
            w.grad.zero_()

#     if t % print_every == 0:
    print('Loss = %.4f' % (loss.item()))
    preds, acc = check_accuracy_part2(X_valid, y_valid, model_fn, params)
    print("Final validation accuracy", acc)
    return preds, acc

hidden_layer_size1 = 6145
hidden_layer_size2 = 3173
hidden_layer_size3 = 1587
learning_rate = 1e-3
iters = 1000 # no of iterations

# w1 = random_weight((hidden_layer_size1, 3 * 64 * 64 + 1))
# b1 = zero_weight((hidden_layer_size1, 1))
# w2 = random_weight((1, hidden_layer_size1))
# b2 = zero_weight((1, 1))
# params = [w1, b1, w2, b2]
# w1 = random_weight((hidden_layer_size1, 3 * 64 * 64 + 1))
# b1 = zero_weight((hidden_layer_size1, 1))
# w2 = random_weight((hidden_layer_size2, hidden_layer_size1))
# b2 = zero_weight((hidden_layer_size2, 1))
# w3 = random_weight((1, hidden_layer_size2))
# b3 = zero_weight((1, 1))
# params = [w1, b1, w2, b2, w3, b3]
w1 = random_weight((hidden_layer_size1, 3 * 64 * 64 + 1))
b1 = zero_weight((hidden_layer_size1, 1))
w2 = random_weight((hidden_layer_size2, hidden_layer_size1))
b2 = zero_weight((hidden_layer_size2, 1))
w3 = random_weight((hidden_layer_size3, hidden_layer_size2))
b3 = zero_weight((hidden_layer_size3, 1))
w4 = random_weight((1, hidden_layer_size3))
b4 = zero_weight((1, 1))
params = [w1, b1, w2, b2, w3, b3, w4, b4]



# Constant to control how frequently we print train loss.
print_every = 200

# convert numpy array to tensors
X_tr_tensor = torch.from_numpy(X_train_app)
Y_tr_tensor = torch.from_numpy(y_train)
X_val_tensor = (torch.from_numpy(X_valid_app)).to(dtype=dtype)
Y_val_tensor = (torch.from_numpy(y_valid)).to(dtype=dtype)

preds, acc = train_part2(X_tr_tensor, Y_tr_tensor, X_val_tensor, Y_val_tensor, four_layer_fc, params, learning_rate, iters, print_every=print_every)

Loss = 47.2639
Accuracy tensor(49.1667)
Final validation accuracy tensor(0.4917)
