In [1]:
import os
import numpy as np
import seaborn as sb
import pandas as pd
import matplotlib.pyplot as plt
import torch
from sklearn.datasets import make_regression

%matplotlib inline

root_random = 50
np.random.seed(root_random)

# Section 1

- Becoming familiar with tensor based operations and generating synthetic data.
- Are input features uninformative? Are the hyper-parameters set correctly? Is there an inherent bug?
- Test by passing data with known correct distribution and compare correctness

# Problem
- This task is a regression task
- Use Tensors on Synthetic data I produce
- Come up with a mapping function from Rm to Rn, where m and n are set and both are > 3.
- Provide the equation of the functions
- Add random noise to the output to make the problem more interesting
- Ensure underlying function is still recognisable
- Be careful of expected value of noise
- Pass data to the algorithm to test the output model

# Notes
- Do not use regression libraries. Must all be implemented using tensors.
- Avoid using loops where tensor operations could act on multiple values at once.
- Avoid using matrix inverse operators. Use gradients instead.
- Be mindful of gradient tracking.
- Be sure to generate enough data.
- Choose appropriate learning rate
- Choose appropriate learning termination criteria.

In [2]:
#Produce Synthetic Data
#Functions 
#Target (R5) = Input (R4)
#Y1 = X1
#Y2 = 2*(X2 - X1)
#Y3 = X2/(X1 - X4)
#Y4 = X3 + X4 - X1
#Y5 = X4 - X2 * X3

def generate_synthetic_data():
    #Generate data
    dataset = 10*np.random.random_sample((20, 4))
    return dataset

def add_noise(data):
    adding = np.random.random_sample((20, 4))-0.5
    out = np.add(data, adding)
            
    return out

dataset = generate_synthetic_data()
results = np.zeros((20, 5))
for i in range(20):
    for j in range(5):
        if j == 0:
            results[i][j] = dataset[i][0]
        elif j == 1:
            results[i][j] = 2 * (dataset[i][1] - dataset[i][0])
        elif j == 2:
            results[i][j] = dataset[i][1]/(dataset[i][0] - dataset[i][3])
        elif j == 3:
            results[i][j] = dataset[i][2] + dataset[i][3] - dataset[i][0]
        elif j == 4:
            results[i][j] = dataset[i][3] - dataset[i][1] * dataset[i][2]
        
noisey_dataset = add_noise(dataset)
noisey_results = np.zeros((20, 5))
for i in range(20):
    for j in range(5):
        if j == 0:
            noisey_results[i][j] = noisey_dataset[i][0]
        elif j == 1:
            noisey_results[i][j] = 2 * (noisey_dataset[i][1] - noisey_dataset[i][0])
        elif j == 2:
            noisey_results[i][j] = noisey_dataset[i][1]/(noisey_dataset[i][0] - noisey_dataset[i][3])
        elif j == 3:
            noisey_results[i][j] = noisey_dataset[i][2] + noisey_dataset[i][3] - noisey_dataset[i][0]
        elif j == 4:
            noisey_results[i][j] = noisey_dataset[i][3] - noisey_dataset[i][1] * noisey_dataset[i][2]

#Convert to tensors
inputs = torch.from_numpy(noisey_dataset)
targets = torch.from_numpy(noisey_results)
actual_model = torch.from_numpy(results)
print(inputs)

#Tensors now ready

tensor([[ 4.4599,  2.3697,  2.4377,  3.5781],
        [ 4.1376, 10.2874,  4.3195,  8.0593],
        [ 7.5069,  3.3487,  3.5250,  3.6309],
        [ 1.2528,  9.8290,  9.0175,  5.6381],
        [ 3.1226,  8.8819,  7.1632,  3.6738],
        [ 4.6215,  5.2037,  9.1882,  5.7008],
        [ 6.2539,  0.6047,  3.7576,  0.2731],
        [ 1.5107,  9.7205,  9.5871,  7.2339],
        [ 7.2077,  4.7562,  9.3157,  3.6540],
        [ 6.6292,  6.8273,  5.6631,  2.6249],
        [ 6.9119,  8.2971,  7.8372,  3.8622],
        [ 8.8061,  7.4976,  6.9240,  1.6322],
        [ 9.5690,  3.6125,  2.8566,  6.1385],
        [ 5.0087,  2.3454,  8.0638,  1.7443],
        [ 2.6736,  6.6937,  4.7038,  0.6401],
        [ 1.1981,  0.9410,  4.6929,  5.5411],
        [ 9.8760,  5.3750,  5.5012,  9.4952],
        [ 8.9370,  3.0052,  7.0013, 10.0355],
        [ 2.0127,  6.7119,  8.1873,  9.8248],
        [ 8.0601,  4.6286,  0.2827,  0.8041]], dtype=torch.float64)


In [3]:
#Regression Model

#Weights and Biases
weight = torch.rand(5, 4, requires_grad=True)
bias = torch.rand(5, requires_grad=True)

print(weight)

#Model
def model(val):
    return val @ (weight.t()).double() + bias.double()

preds = model(inputs)
#print(preds)
#print(targets)

tensor([[0.1313, 0.2397, 0.1117, 0.6244],
        [0.8666, 0.4330, 0.2931, 0.1306],
        [0.5411, 0.0471, 0.2354, 0.7742],
        [0.8874, 0.8023, 0.1517, 0.6454],
        [0.7711, 0.6292, 0.8463, 0.5902]], requires_grad=True)


In [4]:
#Loss calculations

#Mean square error loss function
def mse(pred, target):
    diff = pred - target
    return torch.sum(diff * diff) / diff.numel()

#Calc loss
loss = mse(preds, targets)
print(loss)

#Compute gradient
loss.backward()

# Train for 100 epochs
for i in range(100):  
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        weight -= weight.grad * 1e-2
        bias -= bias.grad * 1e-2
        weight.grad.zero_()
        bias.grad.zero_()

        if (i+1)%10==0:
            print(loss)

tensor(716.2540, dtype=torch.float64, grad_fn=<DivBackward0>)
tensor(62.1511, dtype=torch.float64, grad_fn=<DivBackward0>)
tensor(36.3239, dtype=torch.float64, grad_fn=<DivBackward0>)
tensor(25.4625, dtype=torch.float64, grad_fn=<DivBackward0>)
tensor(20.6834, dtype=torch.float64, grad_fn=<DivBackward0>)
tensor(18.4631, dtype=torch.float64, grad_fn=<DivBackward0>)
tensor(17.3672, dtype=torch.float64, grad_fn=<DivBackward0>)
tensor(16.7910, dtype=torch.float64, grad_fn=<DivBackward0>)
tensor(16.4683, dtype=torch.float64, grad_fn=<DivBackward0>)
tensor(16.2759, dtype=torch.float64, grad_fn=<DivBackward0>)
tensor(16.1537, dtype=torch.float64, grad_fn=<DivBackward0>)


# Section 2

- Small scale transfer learning
- Train a CNN on a simple problem and try to transfer the learning to ma larger and more difficult problem
- USE NN OPTIMISATION FROM pytorch

# Problem
- Define two network classes
    - torchvision.datasets.MNIST
    - torchvision.datasets.CIFAR10
- Train first network to a good performance
- Train second network to observe losses over time (epochs)
- Test to see if copying layers from MNIST to CIFAR10 (before training) improves training of the second network.

# Notes
- Which layers are good to be transfered? (all but last 2?)
- How to deal with differing dimensionality?
- How to copy objects across?
- Presenting findings coherently

In [5]:
print ("Hi, again")

Hi, again


# Section 3

- Option A Cost Sensitive Learning

# Problem
- C is a k-by-k cost matrix for a classification class with k classes
- the element C(i,j) is the cost of classifying an instance of class j as class i
- Want to minimise the expected cost of predictions

- Write Psuedocode that, when optomised, achieves minimum expected cost of prediction
- Assuming a learning algorithm which minimises cost prediction is given, write a different way of achieving minimum expected cost prediction


# Notes
- Consider cases with k > 1 and briely discuss scalability.
- Do not have to use tensors
- Can write in pseudo-Python