In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import sys
import os
import numpy as np
import pandas as pd
import math
np.random.seed(42)
y_min=0
NUM_FEATS = 72
class Net(object):
  def __init__(self, num_layers, num_units):
    self.num_layers = num_layers
    self.num_units = num_units
    self.biases = []
    self.weights = []
    for i in range(num_layers):
      if i==0:
        self.weights.append(np.random.uniform(-1, 1, size=(NUM_FEATS, self.num_units)))
      else:
        self.weights.append(np.random.uniform(-1, 1, size=(self.num_units, self.num_units)))
        self.biases.append(np.random.uniform(-1, 1, size=(self.num_units, 1)))
    self.biases.append(np.random.uniform(-1, 1, size=(1, 1)))
    self.weights.append(np.random.uniform(-1, 1, size=(self.num_units, 1)))

  def __call__(self, X):
    Y_pred = []
    w=self.weights
    b=self.biases
    k=0

    H=[]
    H.append(X)

    for i in range(self.num_layers+1):
      A=[]
      if(i==0):
        c=np.maximum(0,np.dot(list(H[0]),w[0]))
        H.append(c)
      elif(i==self.num_layers):
        c=np.dot(list(H[i]),w[i])+b[i-1].T
        H.append(c)
      else:
        c=np.maximum(0,np.dot(list(H[i]),w[i]))+b[i-1].T
        H.append(c)
    Y_pred.append(H[-1])
    return Y_pred[0],H
  def backward(self,y_original,y_pred, HH1, lamda):
    node_d_w,weight_d_w,bias_d_w=[],[],[]
    w=np.einsum('ij,ik->ijk',HH1[-2],(2*(y_pred-y_original)))+2*0.1*self.weights[-1]
    n=np.dot(self.weights[-1],(2*(y_pred-y_original).T)).T
    node_d_w.append(n)
    weight_d_w.append(w)
    bias_d_w.append(2*(y_pred-y_original))
    for i in range(self.num_layers-1):
      bias_d_w.append(n)
      w=np.einsum('ij,ik->ijk',node_d_w[i],HH1[self.num_layers-i-1])+2*0.01*self.weights[self.num_layers-i-1]
      n=node_d_w[i]@self.weights[self.num_layers-i-1]
      node_d_w.append(n)
      weight_d_w.append(w)
    w=np.einsum('ij,ik->ijk',HH1[0],node_d_w[-1])+2*0.01*self.weights[0]
    weight_d_w.append(w)
    return weight_d_w,node_d_w,bias_d_w

class Optimizer(object):
  def __init__(self, learning_rate):
    self.l_r=learning_rate
  def step(self, weights, biases, delta_weights, delta_biases):
    for i in range(len(delta_weights)):
        weights[-i-1]=weights[-i-1]-self.l_r*(np.sum(delta_weights[i],axis=0)/32)
    # for i in range(len(b)):
    #     bb=np.sum(delta_biases[i],axis=0)/32
    #     bb=np.reshape(bb,(1, bb.size)).T
    #     biases[-i-1]=biases[-i-1]-self.l_r*0.00001*bb
    return weights,biases



def loss_mse(y, y_hat):
  return (np.sum(np.square(y_hat-y))/len(y))

def loss_regularization(weights, biases):
  return np.square(weights[-1])

def loss_fn(y, y_hat, weights, biases, lamda):
  return (loss_mse(y,y_hat)+lamda*loss_regularization(weights,biases))

def rmse(y,y_hat):
  return (math.sqrt(np.sum(np.square(y_hat-y))/len(y)))

def cross_entropy_loss(y, y_hat):
  return (np.mean(-np.log(y_hat[range(len(y_hat)), np.argmax(y,axis=1)])))


def train(net, optimizer, lamda, batch_size, max_epochs,train_input, train_target,dev_input, dev_target):
  for pp in range(max_epochs):
    d=1
    epoch_loss = 0.
    k=0
    while(d<len(train_input)-batch_size+1):
      batch_input = train_input[d:d+batch_size]
      batch_target = train_target[d:d+batch_size]
      batch_target = np.reshape(batch_target,(1, batch_target.size)).T
      Y_pred,HH=net.__call__(batch_input)
      w,n,b=net.backward(batch_target,Y_pred,HH,lamda)
      w,b=optimizer.step(net.weights,net.biases,w,b)
      net.weights=w
      net.biases=b
      # batch_loss=loss_fn(batch_target, Y_pred, net.weights, net.biases, lamda)
      batch_loss=loss_mse(batch_target, Y_pred)
      epoch_loss+=batch_loss
      d=d+batch_size
      k+=1
    print(epoch_loss/(k))
  dev_pred,HH=net.__call__(dev_input)
  dev_rmse = rmse(dev_target, dev_pred)
  print('RMSE on dev data: {:.5f}'.format(dev_rmse))

test_pred=[]
def get_test_data_predictions(net, inputs):
  Y_test_pred=net.__call__(inputs)
  piyo=[]
  for i in range(len(Y_test_pred)):
    piyo.append(Y_test_pred[i][0]+y_min)

  test_pred.append(0)
  for i in range(len(piyo)):
    test_pred.append((piyo[i][0]))
def normalization(X):
  max_element = np.max(X)
  min_element = np.min(X)
  #print(max_element,min_element)
  X = (X-min_element)/((max_element-min_element)*100)
  return X
def read_data():
  train_X = np.genfromtxt("drive/My Drive/FML_Assignment/DataSet/train.csv", delimiter=',')
  train_target=train_X[ : ,0]
  abhi=feature_extraction(train_X)
  x=train_X.T
  list1=[]
  list2=[]
  for i in range(len(x)):
    if i in abhi:
      list1.append(x[i])
    else:
      list2.append(x[i])
  train_X=np.array(list1).T

  dev_X =  np.genfromtxt("drive/My Drive/FML_Assignment/DataSet/dev.csv",delimiter=',')
  dev_target = dev_X[ : ,0]
  x=dev_X.T
  list1=[]
  list2=[]
  for i in range(len(x)):
    if i in abhi:
      list1.append(x[i])
    else:
      list2.append(x[i])
  dev_X=np.array(list1).T
  dev_input = dev_X
  dev_input=dev_input[1:]
  dev_target=dev_target[1:]
  test_input =  np.genfromtxt("drive/My Drive/FML_Assignment/DataSet/test.csv",delimiter=',')
  x=test_input.T
  list1=[]
  list2=[]
  for i in range(len(x)+1):
    if i in abhi:
      list1.append(x[i-1])
    else:
      list2.append(x[i-1])
  test_input=np.array(list1).T
  train_X=normalization(train_X[1:])
  dev_input=normalization(dev_input)
  test_input=normalization(test_input[1:])
  y_max=max(train_target[1:])
  y_min=min(train_target[1:])
  train_target=train_target-y_min
  dev_target=dev_target-y_min
  return train_X, train_target, dev_input, dev_target, test_input

def feature_extraction(train_X):
  q=train_X[:,1:]
  p=train_X[:,1:]
  mn=train_X[0]
  mn=mn[0:90]
  dl=pd.DataFrame(data=q,columns=mn)
  count=0;
  corr_matrix=dl.corr()
  col_corr=set()
  for i in range(len(corr_matrix.columns)):
    for j in range(i):
      if abs(corr_matrix.iloc[i,j]) > 0.5:
        #print(i+1,j+1)
        count=count+1
        colname=corr_matrix.columns[i]
        col_corr.add(colname)
  abh=[]
  for i in mn:
    abh.append(float(i))
  stri_ans=[]
  feature_num=[]
  for i in abh:
    if i in col_corr:
      c=1
    else:
      feature_num.append(int(i))
      if(int(i)<13):
        stri_ans.append("TimbreAvg"+str(int(i)))
      else:
        stri_ans.append("TimbreCovariance"+str(int(i)))
  return feature_num
def main():
  max_epochs=200
  batch_size = 32
  learning_rate = 0.0001
  num_layers = 1
  num_units = 64
  lamda = 0.1 # Regularization Parameter
  train_input, train_target, dev_input, dev_target, test_input = read_data()
  net = Net(num_layers, num_units)
  optimizer = Optimizer(learning_rate)
  train(
      net, optimizer, lamda, batch_size, max_epochs,
      train_input, train_target,
      dev_input, dev_target
      )
  get_test_data_predictions(net, test_input)

main()





5800.4757001511925
5768.422680800926
5706.291654656262
5579.988212821281
5325.512483356323
4839.25676066601
4010.469741171
2853.8282114534704
1656.9651462763165
800.1253284645722
373.00588462990027
210.6545440763404
156.63577432753104
138.71029260606318
132.2775645664699
129.73039777234072
128.6343551384706
128.1292246317424
127.87799871712221
127.73853933109511
127.64838175861907
127.5795871349064
127.51963441722346
127.4629756972302
127.4072254507896
127.35142639010509
127.29525891800589
127.2386813641637
127.18176714570605
127.12463169240728
127.06740021532904
127.01019395103161
126.95312469993354
126.89629308430293
126.83978841402464
126.78368922663668
126.72806407558531
126.67297240044692
126.61846537742034
126.56458672659848
126.51137346185675
126.4588565803024
126.40706169242269
126.35600959587421
126.30571679656258
126.25619598088726
126.2074564429925
126.15950447074177
126.11234369393765
126.06597539808868
126.02039880681345
125.97561133573484
125.93160882050644
125.8883857213

In [None]:
train_input, train_target, dev_input, dev_target, test_input = read_data()

In [None]:
train_input.shape

(40800, 72)

In [None]:
import pandas as pd
pd.DataFrame(test_pred).to_csv("drive/My Drive/FML_Assignment/Cmest_pred.csv")