# Import Statements

In [1]:
import pandas as pd
import numpy as np
import random
import math
import matplotlib.pyplot as plt

# Train and Validation Split

In [2]:
#Pass the percentage of data you need for test like 20 % 
def train_split(df,test_per):
    indices=df.index.tolist()
    test_size=round(len(df)*(test_per/100))
    random.seed(0)
    test_indices=random.sample(population=indices,k=test_size)
    valid_df=df.loc[test_indices]
    train_df=df.drop(test_indices)
    return train_df,valid_df

# Activation Functions

In [3]:
# def sigmoid(x):
#     return 1 / (1 + np.exp(-x))

In [4]:
def sigmoid(X):
    sigmoid_ans=np.zeros(X.shape,dtype=np.float128)
    for i in range(0,len(X)):
        x=X[i]
        exp_part=np.exp(-x)
        sigmoid_ans[i]=1/(1+exp_part)

    return sigmoid_ans

In [5]:
def softmax(X):
    no_rows,no_labels=X.shape
    soft_array=np.zeros(X.shape)
    for j in range(no_rows):
        x=X[j]
        sum=0
        for i in range(len(x)):
            sum+=np.exp(x[i])
        if(sum != 0):
            for i in range(len(x)):
                soft_array[j][i]=np.exp(x[i])/sum
        else:
            print("Error:Sum is zero")
        
    return soft_array

# Derivatives of Function

In [6]:
def derivative_sigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))

# Error Functions

In [7]:
def MSE_error(actual_Y,predicted_Y):
    m=len(actual_Y)
    MSE=0
    for i in range(0,len(actual_Y)):
        actual=actual_Y[i]
        predicted=predicted_Y[i]
        MSE+=pow((actual-predicted),2)  
    MSE/=m

    return MSE

In [8]:
def entropy(actual_Y,predicted_Y):
    E=0
    for i in range(len(actual_Y)):
        if(predicted_Y[i] == 0):
            continue
        E-=(actual_Y[i]*np.log2(predicted_Y[i]))
    return E

# Neural Networks

In [9]:
def NN(X,Y,label):
    
    #Initialize layers
    no_samples,no_cols=X.shape
    no_labels=10
    
    no_I=no_cols     #Input layer
    no_O=no_labels  #Output layer
    #print("Nodes in Input layer-",no_I)
    #print("Nodes in Output layer-",no_O)
    no_H=int(math.sqrt(no_I*no_O))  #Hidden layer-1 Hidden layer
    #print("Nodes in Hiddden layer-",no_H)
    
    #Initializing weights and bias
    # W_H=np.random.uniform(size=(no_I,no_H))
    # W_O=np.random.uniform(size=(no_H,no_O))
    # B_H=np.random.uniform(size=(1,no_H))
    # B_O=np.random.uniform(size=(1,no_O))
    W_H=np.random.rand(no_I,no_H)
    W_O=np.random.rand(no_H,no_O)
    B_H=np.random.rand(1,no_H)
    B_O=np.random.rand(1,no_O)
    #print("Initial")
    #print("Weights-",W_O,W_H)
    #print("Biases-",B_O,B_H)
    
    #One-hot encode Y
    Y=one_hot_encode(Y,no_labels)

    alpha=0.003
    epoch=1000
    error_cost=[]
    
    while(epoch > 0):
        
        #Forward Propagation
        H_in=X.dot(W_H)+B_H
        H_out=sigmoid(H_in)
        O_in=H_out.dot(W_O)+B_O
        #O_out=sigmoid(O_in)
        O_out=softmax(O_in)
        #print("O_out-",O_out)
        
        #Backward Propagation
        #Error=MSE_error(Y,O_out)
        #Error=entropy(Y,O_out)
        Error=Y-O_out
        slope_O=derivative_sigmoid(O_out)
        slope_H=derivative_sigmoid(H_out)
        delta_O=Error*slope_O
        Error_H=delta_O.dot(W_O.T)
        delta_H=Error_H*slope_H
        
        #update weights and bias
        W_O=W_O+alpha*(H_out.T.dot(delta_O))
        W_H=W_H+alpha*(X.T.dot(delta_H))        
        B_O+=np.sum(delta_O)*alpha
        B_H+=np.sum(delta_H)*alpha
        #print(W_O,W_H,B_O,B_H)
        
        if epoch % 200 == 0:
            loss = np.sum(-Y*np.log(O_out))
            print('Loss function value: ', loss)
            error_cost.append(loss)
        
        epoch-=1
    
    #print(O_out)
    #print(error_cost)
    return O_out

# Helper Functions

In [10]:
#Creating X(data.T),Y arrays from df
def to_arrays(df,label):
    no_rows,no_cols=df.shape
    header=list(df.columns)
    data_array=np.ones((no_cols-1,no_rows))
    for i in range(1,no_cols):             #Removing first col-label
            x=df[header[i]].values
            #data_array[i-1]=x
            data_array[i-1]=mean_normalize(x)
    X=data_array.T

    #Y (output) array
    Y=np.array(df[label].values)  
    return X,Y

In [11]:
def mean_normalize(x):
    x_new=np.zeros(len(x))
    mean=np.mean(x)
    std=np.std(x)
    for i in range(0,len(x)):
        if( int(mean) == 0):
            #print("mean is zero")
            x_new[i]=x[i]
        elif( int(std) == 0 ):
            #print("std is zero")
            x_new[i]=(x[i]- int(mean))
        else:
            x_new[i]=(x[i]-mean)/std
    return x_new

In [12]:
def one_hot_encode(Y,no_labels):
    encoded_Y=np.zeros((len(Y),no_labels))
    for i in range(0,len(Y)):
        encoded_Y[i,Y[i]]=1
    return encoded_Y

# Main

In [13]:
def main():
    
    #Loading the file
    file="Apparel/apparel-trainval.csv"
    label='label'
    df=pd.read_csv(file)
   
    #Train and Validation Split
    #train_df,valid_df=train_split(df,20)
    train_df=df
    
    #Training
    train_X,train_Y=to_arrays(train_df,label)
    
    output=NN(train_X,train_Y,label)

    #Writing to file
    output_file="output_prediction.csv"
    f = open(output_file,"w")
    no_rows,no_labels=output.shape
    correct=0
    for i in range(no_rows):
        prediction=np.argmin(output[i])
        actual=train_Y[i]
        if(actual == prediction):
            correct+=1
        #print(prediction)
        f.write(str(prediction))

    accuracy=correct/no_rows
    print("Accuracy-",accuracy)

    return

In [None]:
if __name__ == "__main__":
    main()

Loss function value:  195558.97067047574


  """


Loss function value:  nan
Loss function value:  nan


  
  # This is added back by InteractiveShellApp.init_path()
  # This is added back by InteractiveShellApp.init_path()
