# import

In [1]:
%pip install wandb -q
import wandb
import numpy as np
import random
import cv2
random.seed(1)
sweep_config={
    'method' : 'random' ,
    'metric' : { 'name' : 'val_acc' , 'goal' : 'maximize' } ,
    'parameters' : {
        'epochs' : { 'values' : [5,6,10] },
        'n_hidden_layers' : {'values' : [3,4,2]},
        'n_hidden_layer_size' : { 'values' : [32,64,128]},
        'batch_size' : { 'values' : [16,32,64]},
        'weight_decay' : { 'values' : [0,0.0005] },
        'learning_rate' : { 'values' : [1e-3 ,5e-3,1e-4] },
        'optimizer' : { 'values' : ['sgd','ngd','mgd','rmsprop','adam','nadam','adagrad'] },
        'activations' : { 'values' : ['sigmoid','tanh','Relu'] },
        'loss_function' : {'values' : ['cross_entropy' , 'squared_error']},
        'weight_ini' : {'values' : ['random' , 'xavier']}
    }
}



[K     |████████████████████████████████| 2.0MB 5.1MB/s 
[K     |████████████████████████████████| 163kB 20.1MB/s 
[K     |████████████████████████████████| 102kB 7.8MB/s 
[K     |████████████████████████████████| 133kB 24.3MB/s 
[K     |████████████████████████████████| 71kB 6.9MB/s 
[?25h  Building wheel for subprocess32 (setup.py) ... [?25l[?25hdone
  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# NN

In [3]:
class NeuralNetwork():

  def __init__(self,n_input,n_output,n_hidden_layers,n_hidden_neurons):
    self.n_input = n_input
    self.n_output = n_output
    self.n_hidden_layers = n_hidden_layers
    self.n_hidden_neurons = n_hidden_neurons  
    self.lambda1 = None 
    self.eta = None
    self.batch_size = None
    self.n_epoch = None
    self.X_train = None
    self.Y_train = None
    self.Y_train_o = None
    self.X_test = None
    self.Y_test = None
    self.Y_test_o = None
    self.vx = None
    self.vy = None
    self.vy_o = None
    self.arguments = None
    # print(self.n_hidden_neurons)

  def create_network_random(self):
    self.total_layers = 2 + self.n_hidden_layers 
    self.W = {}
    self.b = {}
    
    # initialization for W0 and b0 i.e. input layer
    self.W[0] = np.random.randn(self.n_input,self.n_hidden_neurons[0])
    self.b[0] = np.random.randn(self.n_hidden_neurons[0])
    # print(type(W[0]))

    # hidden layer
    for i in range(1,self.n_hidden_layers):
      self.W[i] = np.random.randn(self.n_hidden_neurons[i-1],self.n_hidden_neurons[i])
      self.b[i] = np.random.randn(self.n_hidden_neurons[i])
    
    # output layer
    self.W[self.total_layers-2] = np.random.randn(self.n_hidden_neurons[-1],self.n_output)
    self.b[self.total_layers-2] = np.random.randn(self.n_output)

    # print("W",len(self.W))
    # for i in self.W.values():
    #   print(np.shape(i)) 
    # print("self.b",len(self.b))
    # for i in self.b.values():
    #   print(len(i))

  def create_network_xavier(self):
    self.total_layers = 2 + self.n_hidden_layers 
    self.W = {}
    self.b = {}
    
    # initialization for W0 and b0 i.e. input layer
    self.W[0] = np.random.rand(self.n_input,self.n_hidden_neurons[0]) * np.sqrt((self.n_input+self.n_hidden_neurons[0]))
    self.b[0] = np.random.rand(self.n_hidden_neurons[0]) * 0
    # print(type(W[0]))

    # hidden layer
    for i in range(1,self.n_hidden_layers):
      self.W[i] = np.random.rand(self.n_hidden_neurons[i-1],self.n_hidden_neurons[i]) * np.sqrt((self.n_hidden_neurons[i-1] + self.n_hidden_neurons[i]))
      self.b[i] = np.random.rand(self.n_hidden_neurons[i]) * 0
    
    # output layer
    self.W[self.total_layers-2] = np.random.rand(self.n_hidden_neurons[-1],self.n_output) * np.sqrt((self.n_hidden_neurons[-1] + self.n_output))
    self.b[self.total_layers-2] = np.random.rand(self.n_output) *0

  def sigmoid(self,n):
    if(n >= 1e+2 ):
      return 1
    elif (n <= 1e-2):
      return 1e-3
    return 1/(1 + np.exp(-n,dtype= np.float128))

  # single value n
  def grad_sigmoid(self,n):
    temp = self.sigmoid(n)
    return temp * (1 - temp)

  def Relu(self,n):
    if n <= 0:
      return 0
    else:
      return n
    
  def grad_Relu(self,n):
    if n <= 0:
      return 0
    else:
      return 1

  def tanh(self,n):
    if ( n >= 1e+2):
      return 1
    elif (n <= -1e+2):
      return -1
    else:
      return (np.exp(n,dtype=np.float128) - np.exp(-1*n)) / (np.exp(n) + np.exp(-1*n))
    return 0
  
  def grad_tanh(self,n):
    return 1-np.power(self.tanh(n),2)

  # a is list
  def softmax(self,a):
    p = []
    for i in a:
      if(i <= 1e+4 ):
        if(i >= -1e+4):
          p.append(np.exp(i,dtype=np.float128))
        else:
          p.append(0)
      else:
        return a/np.sum(a)
    if(np.sum(p) == 0):
      return p
    return p/np.sum(p)

  # y_o original output y_p predicted output
  def cross_entropy(self,y_o,y_p):
    for i,j in zip(y_o,y_p):
      if (i==1 and j >= 0):
        return -1*np.log(1e-15+j,dtype=np.float128) + self.regularize_loss()
    return 1e+3 

  #  gradient for the oputput layer when cross entropy is used
  def grad_cross_entropy(self,y_o,y_p):
    return -(y_o - y_p) 

  def regularize_loss(self):
    temp = self.squr(self.W)
    # print(temp)
    total = 0
    for i in temp.keys():
      total = total + np.sum(temp[i])

    return self.lambda1 * total

  def squared_error(self,y_o,y_p):
    return  np.sum((np.array(y_o)-np.array(y_p))**2) + self.regularize_loss()

  def grad_squared_error(self,y_o,y_p):
    # print(y_o,y_p)
    y_o = list(y_o)
    y_p = list(y_p)
    temp =[]
    ind = y_o.index(max(y_o))
    for i in range(len(y_o)):
      temp.append(-1* 2 * y_p[ind] * (y_o[i]-y_p[ind]) * (y_o[i]-y_p[i]))
    return np.array(temp)

  def forward_pass(self,X,W,b):
    # h does not contain X
    h={}
    a={}
    # input 
    a[0] = np.array(X @ W[0] + b[0],dtype=np.float32)
    # print(a[0])
  
  
    h[0] = list(map(lambda x :getattr(self,self.arguments[0])(x),a[0]))
    # print(h[0])

    # hidden
    for i in range(1,self.n_hidden_layers):
      a[i] = np.array(h[i-1] @ W[i] + b[i])
      h[i] = list(map(lambda x : getattr(self,self.arguments[0])(x),a[i]))

    # output
    a[self.n_hidden_layers] = np.array(h[self.n_hidden_layers-1] @ W[self.n_hidden_layers] + b[self.n_hidden_layers])

    # print("a",a,"h")

    return a , h

  def backward_pass(self,x,y,p,a,h,W,b):
    
    d_al = None
    d_h = None
    inner_activation = self.arguments[0]
    output_activation = self.arguments[2]

    # gradient for output w.r.t a_l
    # print("o",y)
    d_al = getattr(self,"grad_"+output_activation)(y,p)
    # print(self.n_hidden_layers)
    d_w ={}
    d_b ={}
    #  for all hidden layers
    # print(self.n_hidden_layers)
    for i in range(self.n_hidden_layers,0,-1):
      d_w[i] = np.array([np.dot(d_al,h[i-1][j]) for j in range(self.n_hidden_neurons[i-1])])
    
      d_b[i] = np.array(d_al)

      d_h = np.array((np.matrix(W[i]) @ np.matrix(d_al).T).T)[0]
      # print("d_h",d_h)

      d_al = [d_h[j] * getattr(self,"grad_"+inner_activation)(a[i-1][j]) for j in range(self.n_hidden_neurons[i-1])]

    # for input layer
    d_w[0] = np.array([np.dot(d_al,x[j]) for j in range(self.n_input)])
    d_b[0] =  np.array(d_al)

    return self.add(d_w,W,1,self.lambda1) , d_b

  def gradient(self,x,y,W,b):
    # forward pass
    a , h = self.forward_pass(x,W,b)
    # print(a,h)
    # output activation
    p = getattr(self,self.arguments[1])(a[self.n_hidden_layers])

    # loss
    l = getattr(self,self.arguments[2])(y,p)

    # backward pass
    d_w ,d_b = self.backward_pass(x,y,p,a,h,W,b)
    # print(d_w , d_b)

    return d_w , d_b , l

  # dictionary add key wise
  def add(self,d1,d2,m1=1,m2=1):
    temp ={}
    # print(d1,d2,"dd")
    if (m2==0):
      return d1
    for i in d1.keys():
      temp[i] = m1 * d1[i] + m2 * d2[i]
    return temp

  def mul(self,d1 , m1 = 1):
    temp ={}
    for i in d1.keys():
      temp[i] = m1 * d1[i]
    return temp

  def squr(self,d):
    temp = {}
    for i in d.keys():
      temp[i] = d[i]**2
    return temp

  def adarate(self,d1,d2,n,e):
    temp = {}
    for i in d1.keys():
      temp[i] = (n*d1[i]) /(e+d2[i])**(1/2)
    return temp

  def onehot_encoding(self,a,n_class):
    temp = []
    for i in a:
      t1 = np.zeros(n_class)
      t1[i] = 1
      temp.append(t1)
    return temp

  #  list of classes ex. [1,2,1,..]
  def accuracy(self,y_o, y_p):
    sum = 0
    for i,j in zip(y_o , y_p):
      if(i == j):
        sum = sum + 1
    return sum/len(y_o)

  def predict_and_loss(self,X,Y,n):
    loss = 0
    predicted_class = []
    for i in range(n):
      # forward pass
      a , h = self.forward_pass(X[i],self.W,self.b)
      # if(i==0):
      #   print("a",a,"h",h)
      # output activation
      p = getattr(self,self.arguments[1])(a[self.n_hidden_layers])
      p = list(p)
      # print(p)
      predicted_class.append(p.index(max(p)))

      #  loss
      # print(i,len(Y))
      # print(p)
      ll = getattr(self,self.arguments[2])(Y[i],p)
      # print(ll)
      if(np.isnan(ll) or np.isinf(ll)):
        loss = loss + 1e+100
      else:
        loss = loss + ll

    return predicted_class , loss/n

  def accuracy_and_loss(self,X,Y,y):
    n_points , _ = np.shape(X)
    # print(len(Y))
    p , l = self.predict_and_loss(X,Y,n_points)
    # print(p)
    acc  = self.accuracy(y, p)

    return acc , l

  

  def sgd(self):
    d_w , d_b = self.mul(self.W , 0) , self.mul(self.b,0)
   
    for i in range(self.n_epoch):
      for j in range(self.n_points):
        t1 , t2 , _ = self.gradient(self.X_train[j],self.Y_train[j],self.W,self.b)
        d_w = self.add(d_w , t1)
        d_b = self.add(d_b , t2)
      

        if(j%self.batch_size == 0):
          # print("dw",d_w,"db",d_b)
          self.W = self.add(self.W , d_w , 1, -1*self.eta)
          self.b = self.add(self.b , d_b , 1, -1*self.eta)
          d_w , d_b = self.mul(self.W , 0) , self.mul(self.b,0)
        

      # train acc and loss
      t4 , t5 = self.accuracy_and_loss(self.X_train,self.Y_train,self.Y_train_o)
      t4,t5 = float(t4),float(t5)
      # validate
      va , vl = self.accuracy_and_loss(self.vx,self.vy,self.vy_o)
      va,vl = float(va),float(vl)
      wandb.log({'train_acc' : t4 , 'train_loss' : t5 , 'val_acc' : va, 'val_loss': vl })
      print("epoch",i,"train acc", t4 , "train loss" ,t5 , "validation acc" , va , "validation loss" ,vl)

  def mgd(self,gamma = 0.5):
    d_w , d_b = self.mul(self.W , 0) , self.mul(self.b,0)
    p_w , p_b =self.mul(self.W , 0) , self.mul(self.b,0)
    v_w , v_b =self.mul(self.W , 0) , self.mul(self.b,0)

    for i in range(self.n_epoch):
      for j in range(self.n_points):
        t1 , t2 , _ = self.gradient(self.X_train[j],self.Y_train[j],self.W,self.b)
        d_w = self.add(d_w , t1)
        d_b = self.add(d_b , t2)   
    
        if(j%self.batch_size == 0):
          v_w , v_b = self.add(p_w , d_w , gamma , self.eta) , self.add(p_b , d_b , gamma , self.eta)
          self.W = self.add(self.W , v_w , 1, -1)
          self.b = self.add(self.b , v_b , 1 , -1)
          p_w , p_b = v_w , v_b
          # print("dw",d_w,"db",d_b)
          d_w , d_b = self.mul(self.W , 0) , self.mul(self.b,0)
        
      # train acc and loss
      t4 , t5 = self.accuracy_and_loss(self.X_train,self.Y_train,self.Y_train_o)
      t4,t5 = float(t4),float(t5)
      # validate
      va , vl = self.accuracy_and_loss(self.vx,self.vy,self.vy_o)
      va,vl = float(va),float(vl)
      wandb.log({'train_acc' : t4 , 'train_loss' : t5 , 'val_acc' : va, 'val_loss': vl })
      print("epoch",i,"train acc", t4 , "train loss" ,t5 , "validation acc" , va , "validation loss" ,vl)

  def ngd(self,gamma = 0.9):
    p_v_w , p_v_b = self.mul(self.W , 0) , self.mul(self.b,0)
    v_w , v_b = self.mul(self.W , 0) , self.mul(self.b,0)
    d_w , d_b = self.mul(self.W , 0) , self.mul(self.b,0)

    for i in range(self.n_epoch):
      for j in range(self.n_points):
        t1 , t2 , _ = self.gradient(self.X_train[j],self.Y_train[j],self.add(self.W,v_w,1,-gamma),self.add(self.b,v_b,1,-gamma))
        d_w = self.add(d_w , t1)
        d_b = self.add(d_b , t2)
      
        if(j%self.batch_size == 0):
          v_w , v_b = self.add(p_v_w , d_w , gamma , self.eta) , self.add(p_v_b , d_b , gamma , self.eta)
          self.W = self.add(self.W , v_w , 1, -1)
          self.b = self.add(self.b , v_b , 1 , -1)
          d_w , d_b = self.mul(self.W , 0) , self.mul(self.b,0)
          p_v_w , p_v_b = v_w , v_b

      # train acc and loss
      t4 , t5 = self.accuracy_and_loss(self.X_train,self.Y_train,self.Y_train_o)
      t4,t5 = float(t4),float(t5)
      # validate
      va , vl = self.accuracy_and_loss(self.vx,self.vy,self.vy_o)
      va,vl = float(va),float(vl)
      wandb.log({'train_acc' : t4 , 'train_loss' : t5 , 'val_acc' : va, 'val_loss': vl })
      print("epoch",i,"train acc", t4 , "train loss" ,t5 , "validation acc" , va , "validation loss" ,vl)

  def adagrad(self,eps = 1e-8):
    v_w , v_b = self.mul(self.W , 0) , self.mul(self.b,0)
    d_w , d_b = self.mul(self.W , 0) , self.mul(self.b,0)
    
    for i in range(self.n_epoch):
      for j in range(self.n_points):
        t1 , t2 , _ = self.gradient(self.X_train[j],self.Y_train[j],self.W,self.b)
        d_w = self.add(d_w , t1)
        d_b = self.add(d_b , t2)
      
        if(j%self.batch_size == 0):
          v_w , v_b = self.add(v_w, self.squr(d_w)) , self.add(v_b , self.squr(d_b))
          self.W = self.add(self.W , self.adarate(d_w,v_w,self.eta,eps) , 1, -1)
          self.b = self.add(self.b , self.adarate(d_b,v_b,self.eta,eps) , 1 , -1)
          d_w , d_b = self.mul(self.W , 0) , self.mul(self.b,0)    

      # train acc and loss
      t4 , t5 = self.accuracy_and_loss(self.X_train,self.Y_train,self.Y_train_o)
      t4,t5 = float(t4),float(t5)
      # validate
      va , vl = self.accuracy_and_loss(self.vx,self.vy,self.vy_o)
      va,vl = float(va),float(vl)
      wandb.log({'train_acc' : t4 , 'train_loss' : t5 , 'val_acc' : va, 'val_loss': vl })
      print("epoch",i,"train acc", t4 , "train loss" ,t5 , "validation acc" , va , "validation loss" ,vl)

  def rmsprop(self,eps = 1e-8,beta1=0.9):
    v_w , v_b = self.mul(self.W , 0) , self.mul(self.b,0)
    d_w , d_b = self.mul(self.W , 0) , self.mul(self.b,0)

    for i in range(self.n_epoch):
      for j in range(self.n_points):
        t1 , t2 , _ = self.gradient(self.X_train[j],self.Y_train[j],self.W,self.b)
        d_w = self.add(d_w , t1)
        d_b = self.add(d_b , t2)
      
        if(j%self.batch_size == 0):
          v_w , v_b = self.add(v_w, self.squr(d_w),beta1,(1-beta1)) , self.add(v_b , self.squr(d_b),beta1,(1-beta1))
          self.W = self.add(self.W , self.adarate(d_w,v_w,self.eta,eps) , 1, -1)
          self.b = self.add(self.b , self.adarate(d_b,v_b,self.eta,eps) , 1 , -1)
          d_w , d_b = self.mul(self.W , 0) , self.mul(self.b,0)

      # train acc and loss
      t4 , t5 = self.accuracy_and_loss(self.X_train,self.Y_train,self.Y_train_o)
      t4,t5 = float(t4),float(t5)
      # validate
      va , vl = self.accuracy_and_loss(self.vx,self.vy,self.vy_o)
      va,vl = float(va),float(vl)
      wandb.log({'train_acc' : t4 , 'train_loss' : t5 , 'val_acc' : va, 'val_loss': vl })
      print("epoch",i,"train acc", t4 , "train loss" ,t5 , "validation acc" , va , "validation loss" ,vl)

  def adam(self,eps = 1e-8,beta1 = 0.9,beta2 =0.999):
    v_w , v_b = self.mul(self.W , 0) , self.mul(self.b,0)
    m_w , m_b = self.mul(self.W , 0) , self.mul(self.b,0)
    d_w , d_b = self.mul(self.W , 0) , self.mul(self.b,0)
    time_stamp = 0
    for i in range(self.n_epoch):
      for j in range(self.n_points):
        t1 , t2 , _ = self.gradient(self.X_train[j],self.Y_train[j],self.W,self.b)
        d_w = self.add(d_w , t1)
        d_b = self.add(d_b , t2)
      
        if(j%self.batch_size == 0):
        
          m_w , m_b = self.add(m_w,d_w,beta1 ,(1-beta1)) , self.add(m_b,d_b,beta1 ,(1-beta1)) 
          v_w , v_b = self.add(v_w, self.squr(d_w),beta2,(1-beta2)) , self.add(v_b , self.squr(d_b),beta2,(1-beta2))
    
          m_w_hat , m_b_hat = self.mul(m_w , 1/(1-beta1**(time_stamp+1))) ,  self.mul(m_b , 1/(1-beta1**(time_stamp+1)))
          v_w_hat , v_b_hat = self.mul(v_w , 1/(1-beta2**(time_stamp+1))) ,  self.mul(v_b , 1/(1-beta2**(time_stamp+1)))  

          self.W = self.add(self.W , self.adarate(m_w_hat,v_w_hat,self.eta,eps) , 1, -1)
          self.b = self.add(self.b , self.adarate(m_b_hat,v_b_hat,self.eta,eps) , 1 , -1)
          d_w , d_b = self.mul(self.W , 0) , self.mul(self.b,0)
          time_stamp = time_stamp+1

      # train acc and loss
      t4 , t5 = self.accuracy_and_loss(self.X_train,self.Y_train,self.Y_train_o)
      t4,t5 = float(t4),float(t5)
      # validate
      va , vl = self.accuracy_and_loss(self.vx,self.vy,self.vy_o)
      va,vl = float(va),float(vl)
      wandb.log({'train_acc' : t4 , 'train_loss' : t5 , 'val_acc' : va, 'val_loss': vl })
      print("epoch",i,"train acc", t4 , "train loss" ,t5 , "validation acc" , va , "validation loss" ,vl)

  def nadam(self,eps = 1e-8,beta1 = 0.9,beta2 =0.999):
    v_w , v_b = self.mul(self.W , 0) , self.mul(self.b,0)
    m_w , m_b = self.mul(self.W , 0) , self.mul(self.b,0)
    d_w , d_b = self.mul(self.W , 0) , self.mul(self.b,0)
    time_stamp=0
    for i in range(self.n_epoch):
      for j in range(self.n_points):
        t1 , t2 , _ = self.gradient(self.X_train[j],self.Y_train[j],self.add(self.W,m_w,1,-beta1),self.add(self.b,m_b,1,-beta1))
        d_w = self.add(d_w , t1)
        d_b = self.add(d_b , t2)
      
        if(j%self.batch_size == 0):
          m_w , m_b = self.add(m_w,d_w,beta1 ,(1-beta1)) , self.add(m_b,d_b,beta1 ,(1-beta1)) 
          v_w , v_b = self.add(v_w, self.squr(d_w),beta2,(1-beta2)) , self.add(v_b , self.squr(d_b),beta2,(1-beta2))
    
          m_w_hat , m_b_hat = self.mul(m_w , 1/(1-beta1**(time_stamp+1))) ,  self.mul(m_b , 1/(1-beta1**(time_stamp+1)))
          v_w_hat , v_b_hat = self.mul(v_w , 1/(1-beta2**(time_stamp+1))) ,  self.mul(v_b , 1/(1-beta2**(time_stamp+1)))  

          self.W = self.add(self.W , self.adarate(self.add(m_w_hat,d_w,beta1,(1-beta1)/(1-beta1**(time_stamp+1))),v_w_hat,self.eta,eps) , 1, -1)
          self.b = self.add(self.b , self.adarate(self.add(m_b_hat,d_b,beta1,(1-beta1)/(1-beta1**(time_stamp+1))),v_b_hat,self.eta,eps) , 1 , -1)
          d_w , d_b = self.mul(self.W , 0) , self.mul(self.b,0)
          time_stamp = time_stamp +1

      # train acc and loss
      t4 , t5 = self.accuracy_and_loss(self.X_train,self.Y_train,self.Y_train_o)
      t4,t5 = float(t4),float(t5)
      # validate
      va , vl = self.accuracy_and_loss(self.vx,self.vy,self.vy_o)
      va,vl = float(va),float(vl)
      wandb.log({'train_acc' : t4 , 'train_loss' : t5 , 'val_acc' : va, 'val_loss': vl })
      print("epoch",i,"train acc", t4 , "train loss" ,t5 , "validation acc" , va , "validation loss" ,vl)
        

  def fit(self,x_train,y_train,vx,vy,x_test,y_test,arg,optimizer,weight_ini,batch_size,epoch,lambda1,eta,run):
    self.X_train = x_train
    self.Y_train = self.onehot_encoding(y_train,10)
    self.Y_train_o = y_train
    self.X_test = x_test
    self.Y_test = self.onehot_encoding(y_test,10)
    self.Y_test_o = y_test
    self.vx = vx
    self.vy = self.onehot_encoding(vy,10)
    self.vy_o = vy
    self.arguments = arg
    self.batch_size = batch_size
    self.n_epoch = epoch
    self.lambda1 = lambda1
    self.eta = eta
    self.n_points , _ = np.shape(self.X_train)

    getattr(self,"create_network_"+weight_ini)()
    getattr(self,optimizer)()

    n_p_t , _ = np.shape(self.X_test) 
    predicted_classes , test_loss = self.predict_and_loss(self.X_test,self.Y_test,n_p_t)
    np.save("drive/My Drive/DL_assignments/assignment1/weight/"+str(run.name)+"W",self.W)
    np.save("drive/My Drive/DL_assignments/assignment1/bias/"+str(run.name)+"b",self.b)
    label = ["T-shirt/top" ,"Trouser" ,"Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle boot"]
    wandb.sklearn.plot_confusion_matrix(self.Y_test_o, predicted_classes, label)
    
    run.finish()

    





# ld

In [4]:



from keras.datasets import fashion_mnist
from sklearn.model_selection import train_test_split
def load_data():
 
  (X_train, Y_train), (X_test, Y_test) = fashion_mnist.load_data()
  # flatten data
  x_train = []
  for j in X_train:
    x_train.append(j.flatten())
  x_train=np.array(x_train,dtype = np.float32)/255

  # mean centered
  temp = []
  m = np.array(np.mean(x_train,0))
  for j in x_train:
    temp.append(np.array(j)-m)
  x_train = np.array(temp,dtype = np.float32)

  # print(x_train[0] - x_train[1])
  x_test = []
  for j in X_test:
   x_test.append(j.flatten())
  x_test=np.array(x_test,dtype = np.float32)/255

  temp = []
  for j in x_test:
    temp.append(np.array(j)-m)
  x_test = np.array(temp,dtype = np.float32)
  # print(x_train.shape,x_test.shape)

  x_train , vx_test , y_train , vy_test = train_test_split(x_train,Y_train,test_size=0.10,random_state=12)
  return x_train ,y_train, vx_test , vy_test, x_test, Y_test



In [5]:
def train():
  run = wandb.init()
  c = run.config
  name = "op_"+str(c.optimizer)+"_ac_"+str(c.activations)+"_l_"+str(c.loss_function)+"_hl_"+str(c.n_hidden_layers)+"_hls_"+str(c.n_hidden_layer_size)+"_ep_"+str(c.epochs)+"_n_"+str(c.learning_rate)+"_bs_"+str(c.batch_size)+"_wi_"+str(c.weight_ini)
  run.name = name
  print(name)
  
  hn = [c.n_hidden_layer_size]*c.n_hidden_layers  
  hl = c.n_hidden_layers 
  l1 = c.weight_decay
  arg = [c.activations,"softmax",c.loss_function]
  opt = c.optimizer
  ep = c.epochs
  bs = c.batch_size
  lr = c.learning_rate
  wi = c.weight_ini

  x_train,y_train,vx,vy,x_test,y_test= load_data()
  n_points , n_input = np.shape(x_train)

  NN = NeuralNetwork(n_input,10,hl,hn)
  NN.fit(x_train,y_train,vx,vy,x_test,y_test,arg,opt,wi,bs,ep,l1,lr,run)

  return




In [6]:
sweepid= wandb.sweep(sweep_config,project="final",entity ="sonagara")
wandb.agent(sweepid,train)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: egi13voh
Sweep URL: https://wandb.ai/sonagara/final/sweeps/egi13voh


[34m[1mwandb[0m: Agent Starting Run: fctq08ow with config:
[34m[1mwandb[0m: 	activations: Relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_function: squared_error
[34m[1mwandb[0m: 	n_hidden_layer_size: 128
[34m[1mwandb[0m: 	n_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: adagrad
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_ini: random
[34m[1mwandb[0m: Currently logged in as: [33msonagara[0m (use `wandb login --relogin` to force relogin)


op_adagrad_ac_Relu_l_squared_error_hl_4_hls_128_ep_1_n_0.0001_bs_32_wi_random
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


In [None]:
##for training without wandb
#  def trin():
#   # run = wandb.init()
#   # c = run.config
#   # # name = "op_"+str(c.optimizer)+"_ac_"+str(c.activations)+"_l_"+str(c.loss_function)+"_hl_"+str(c.n_hidden_layers)+"_hls_"+str(c.n_hidden_layer_size)+"_ep_"+str(c.epochs)+"_n_"+str(c.learning_rate)+"_bs_"+str(c.batch_size)+"_wi_"+str(c.weight_ini)
#   # # run.name = name
#   # # print(name)
  
#   # hn = [c.n_hidden_layer_size]*c.n_hidden_layers  
#   # hl = c.n_hidden_layers 
#   # l1 = c.weight_decay
#   # arg = [c.activations,"softmax",c.loss_function]
#   # opt = c.optimizer
#   # ep = c.epochs
#   # bs = c.batch_size
#   # lr = c.learning_rate
#   # wi = c.weight_ini

#   hn = [12,12]
#   hl=2
#   l1=0.001
#   arg=["sigmoid","softmax","cross_entropy"]
#   opt = "nadam"
#   ep = 3
#   bs = 32
#   lr = 0.001
#   wi = "random"
#   run = 3

 
#   x_train,y_train,vx,vy,x_test,y_test= load_data()
#   n_points , n_input = np.shape(x_train)

#   NN = NeuralNetwork(n_input,10,hl,hn)
#   NN.fit(x_train,y_train,vx,vy,x_test,y_test,arg,opt,wi,bs,ep,l1,lr,run)



#   return

# trin()
