
# Assignment #2

Pedro Stramantinoli P. Cagliume Gomes 175955

Ruy Castilho Barrichelo 177012


## Importação dos Dados

In [75]:
import pandas as pd
import os

def load_fashionMNIST_train():
    csv_path = os.path.join("Data/fashion-mnist-dataset", "fashion-mnist_train.csv")
    return pd.read_csv(csv_path)

def load_fashionMNIST_tests():
    csv_path = os.path.join("Data/fashion-mnist-dataset", "fashion-mnist_test.csv")
    return pd.read_csv(csv_path)

## Feature Scalling e Normalização

In [76]:
import numpy as np

def normalize(train, test):
    train_mean = np.mean(train, axis=0)
    train = train - train_mean
    test = test - train_mean
    
    return train, test

def scale(X):
    max_array = np.max(X, axis=0)
    X = X / max_array[None, :]
    return X

## Geração de matrizes de Features e Target

### Setup inicial dos dados

In [80]:
import numpy as np

#carrega os dados
data_train = load_fashionMNIST_train()
data_test = load_fashionMNIST_tests()

# Separação em Features e Target

data_train_target = np.array(data_train["label"])
data_test_target = np.array(data_test["label"])

data_train = np.array(data_train)
data_test = np.array(data_test)

data_train = np.delete(data_train, 0, axis=1)
data_test = np.delete(data_test, 0, axis=1)

# Normalização
normalized_data_train, normalized_data_test = normalize(data_train,data_test)

# Scaling
scaled_data_train = scale(normalized_data_train)
scaled_data_test = scale(normalized_data_test)

def getTrainSet():
    return scaled_data_train, data_train_target

def getTestSet():
    return scaled_data_test,data_test_target

## Gerador de conjuntos de Cross Validation

In [87]:
# Cross Validation Generation 

from sklearn.model_selection import train_test_split, KFold

# Retorna um gerador de indices
def generate_sets(TRAINING_DATA,type='kfold'):

    # Cross validation using train_test_split
    if (type == 'split'):
       return train_test_split(TRAINING_DATA,test_size=0.2,random_state=0)

    # Cross validation using K-Fold
    # K = 5, Shuffle = true, Seed = 21
    elif (type == 'kfold'):
        kfold_seed = 21

        kfold = KFold(n_splits=5, shuffle=True, random_state=kfold_seed)
        return kfold.split(TRAINING_DATA)

## Units per Layer

In [8]:
input_layer=784 
units_per_hidden_layer=1024 # 2 to the tenth, since there are 10 classes
output_layer=10

## Learning Rate

In [67]:
learning_rate = 0.01

## Number of Interactions

In [53]:
interactions=20000

## Weights and Bias

In [99]:
# Initializes weights randomly

import numpy as np

np.random.seed(21)

def weights1():
    t1 = np.random.rand(input_layer, units_per_hidden_layer)
    

def weights2():
    t2 = np.random.rand(output_layer, units_per_hidden_layer)
    
     
# Adds biases, initialized with zeros

def bias1():
    b1 = np.zeros((1, units_per_hidden_layer))
    return b1

def bias2():
    b2 = np.zeros((1, output_layer))
    return b2

def setupNN():
    return weights1(), bias1(), weights2(), bias2()

## Activation Functions

### Sigmoid and Loss functions

In [49]:
import numpy as np

def sigmoid(z):
    return 1 / (1+np.exp(-z))

def sigmoid_derivative(a):
    return a(1 - a)

def loss(h, y):
    return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()


### ReLU

In [98]:
import numpy as np

def ReLU(z):
    z[z<0]=0
    return z

def ReLU_derivative(z):
    z = np.where(z >= 0, 1, 0)
    return z

### Leaky ReLU

In [97]:
import numpy as np

def leaky_ReLU(z):
    z = np.where(z > 0, z, z * 0.01)
    return z

def leaky_ReLU_derivative(z):
    z = np.where(z >= 0, 1, 0.01)
    return z

## Softmax

In [96]:
import numpy as np

# Stable softmax
def softmax(z):
    expZ = np.exp(z - np.max(z))
    return expZ / expZ.sum()


## Sigma

In [60]:
import numpy as np

def sigma(theta, pastSigma, derivative, z):
    thetaTransp = np.transpose(theta)
    mult = np.dot(thetaTransp, pastSigma)
    deriv = derivative(z)
    elementWise = np.multiply(mult, deriv)
    return elementWise

## Loss Function - Cross Entropy with Softmax

In [64]:
def cross_entropy_loss(delta, y, length):
    delta[range(lenght), y] -= 1
    return delta

## Success/Error Percentages

In [95]:
import numpy as np

def error(prediction, y):
    return np.mean( prediction != y )

def success(prediction, y):
    return np.mean( prediction == y )


# Models

#### Different activation functions for hidden layers, but all of them end with the use of Softmax

## Sigmoid

In [101]:
import numpy as np

def sigmoid_model(X, y):

    W1, B1, W2, B2 = setupNN()

    parameters = {}

    for i in range(0, interactions):

        # Forward propagation

        z1 = np.dot(W1, X) + b1

        a1 = sigmoid(z1)

        z2 = np.dot(W2, a1) + b2

        # Hypothesis
        a2 = softmax(z2)

        # Back propagation
        delta3 = cross_entropy_loss(a2, y, len(X))

        dW2 = np.dot(np.transpose(a1), delta3)
        dB2 = np.sum(delta3, axis=0, keepdims=True)

        delta2 = np.multiply(np.dot(W2,delta3), sigmoid_derivative(a1))

        dW1 = np.dot(np.transpose(X), delta2)
        dB1 = np.sum(delta2, axis=0, keepdims=True)

        # Updating parameters

        W1 += -learning_rate * dW1
        b1 += -learning_rate * db1
        W2 += -learning_rate * dW2
        b2 += -learning_rate * db2

        parameters = {'W1': W1, 'B1': B1, 'W2': W2, 'B2': B2}

def sigmoid_prediction(parameters, X):
    W1, B1, W2, B2 = model['W1'], model['B1'], model['W2'], model['B2']

    z1 = np.dot(W1, X) + b1
    a1 = np.sigmoid(z1)
    z2 = np.dot(W2, a1) + b2
    a2 = np.exp(z2)
    probabilities = a2 / np.sum(a2, axis=1, keepdims=True)
    
    return np.argmax(probabilities, axis=1)

In [103]:
X1, y1 = getTrainSet()
indices_generator = generate_sets(X1)
model_list = []
result_list = []

i=0
for train_index, val_index in indices_generator:
    x1_train = X1[train_index]
    y1_train = y1[train_index]
    x1_val = X1[val_index]
    y1_val = y1[val_index]
     
    parameters = sigmoid_model(x1_train, y1_train)
    model_list.append(parameters)
    
    y1_predict = sigmoid_prediction(parameters, x1_val)
    success = success(y1_predict, y1_val)
    result_list.append(success)
    
    i += 1

for i in range(0, len(result_list)):
    print('Success rate for set #', i, ' is: ', result_list[i])

TypeError: unsupported operand type(s) for *: 'NoneType' and 'float'

## ReLU

In [52]:

a1 = X

# Forward propagation

z2 = np.dot(theta1, a1)

a2 = 

z3 = np.dot(theta2, a2)

# Hypothesis
a3 = 

sigmaL = cost()

# Back propagation

sigma2 = sigma()


delta = delta +


SyntaxError: invalid syntax (<ipython-input-52-8ec9cc791742>, line 8)

## Leaky ReLU

In [52]:

a1 = X

# Forward propagation

z2 = np.dot(theta1, a1)

a2 = 

z3 = np.dot(theta2, a2)

# Hypothesis
a3 = 

sigmaL = cost()

# Back propagation

sigma2 = sigma()


delta = delta +


SyntaxError: invalid syntax (<ipython-input-52-8ec9cc791742>, line 8)

## Tanh

In [52]:

a1 = X

# Forward propagation

z2 = np.dot(theta1, a1)

a2 = 

z3 = np.dot(theta2, a2)

# Hypothesis
a3 = 

sigmaL = cost()

# Back propagation

sigma2 = sigma()


delta = delta +


SyntaxError: invalid syntax (<ipython-input-52-8ec9cc791742>, line 8)