# Code Starts

In [None]:
import numpy as np
import random


class Logistic:

# data generator
  def generator(self,n,m,success_prob):
    
    #input
    X=np.random.randn(n,m+1)

    X[0]=1

    #weights
    w=np.random.randn(m+1,1)

    # creating a noise with the same dimension as the y (n,1) 
    noise =self.bernoulli_noise(success_prob,n)  

    #applying sigmoid
    y=self.sigmoid(X,w)

    #adding noise
    y=y+noise

    predictions=self.y_calc(y)


    #reshaping it from (n,) shape to (n,1) shape
    y=np.reshape(predictions,(len(predictions),1))
    

    return X,y,w

  def y_calc(self,y):
    # predicting 
    predictions=[]
    for i in np.nditer(y): 
      if i>0.5:
        predictions.append(float(1))
      else:
        predictions.append(float(0))
    return predictions




  def logistic_regression(self,X,Y,epochs,threshold,learning_rate,lambd=None,regularizer=None): 

    m=X.shape[1] # No of features

    n=X.shape[0] # No of rows


    beta = np.random.rand(m,1)

    

    if lambd==None: # No regulariser
      pre_cost=float('inf')

      for i in range(epochs):
          pred_y=self.sigmoid(X,beta)
          cost=self.Cross_Entropy_loss(n,y,pred_y)
          # ls.append(cost)
          if abs(pre_cost-cost)<=threshold:
              break
          else:
              pre_cost=cost
              gradient=self.gradient_Cross_Entropy_loss(X,Y,pred_y,n)
              beta=beta-(float(learning_rate)*gradient)
      return pre_cost,beta

    else:
      if regularizer=='l1':
        return self.L1_regularlizer(X=X,Y=Y,n=n,epochs=epochs,threshold=threshold,learning_rate=learning_rate,lambd=lambd,beta=beta)
       
      elif regularizer=='l2':
        return self.L2_regularlizer(X=X,Y=Y,n=n,epochs=epochs,threshold=threshold,learning_rate=learning_rate,lambd=lambd,beta=beta)
        


  def L1_regularlizer(self,X,Y,n,epochs,threshold,learning_rate,lambd,beta):
    pre_cost=float('inf')
    for i in range(epochs):
      pred_y=self.sigmoid(X,beta)
      cost=self.cost_fun_L1(y,pred_y,n,lambd,beta[1:])
      if abs(pre_cost-cost)<=threshold:
              break
      else:
        pre_cost=cost
        gradient=self.gradient_descent_L1(X,y,pred_y,n,lambd,beta[1:])
        beta=beta-(float(learning_rate)*gradient)
    return pre_cost,beta

  def L2_regularlizer(self,X,Y,n,epochs,threshold,learning_rate,lambd,beta):
    pre_cost=float('inf')
    for i in range(epochs):
      pred_y=self.sigmoid(X,beta)
      cost=self.cost_fun_L2(y,pred_y,n,lambd,beta[1:])
      if abs(pre_cost-cost)<=threshold:
              break
      else:
        pre_cost=cost
        gradient=self.gradient_descent_L2(X,y,pred_y,n,lambd,beta[1:])
        beta=beta-(float(learning_rate)*gradient)
    return pre_cost,beta

  def sigmoid_fun(self,z):
    return 1/(1+np.exp(-z))
  

  def cost_fun_L1(self,y,pred_y,n,lambd,beta):
    cost=(np.sum(-(y*np.log(pred_y))-((1-y)*np.log(1-pred_y)))/n) + (lambd/n)*np.linalg.norm(beta,1)
    return cost
  
  def cost_fun_L2(self,y,pred_y,n,lambd,beta):
    cost=(np.sum(-(y*np.log(pred_y))-((1-y)*np.log(1-pred_y)))/n)+ (lambd/(2*n))*(np.sum(beta*beta))
    return cost

  def gradient_descent_L1(self,x,y,pred_y,n,lambd,beta):
    grad_0=np.array(np.sum(pred_y-y)/n).reshape(1,1)
    new_lambd=lambd*(abs(beta)/beta)
    grad_1=(np.matmul(x.T[1:],(pred_y-y))/n) +(new_lambd/n)
    gradient=np.concatenate((grad_0,grad_1),axis=0)
    return gradient
  
  def gradient_descent_L2(self,x,y,pred_y,n,lambd,beta):
    grad_0=np.array(np.sum(pred_y-y)/n).reshape(1,1)
    grad_1=np.matmul(x.T[1:],(pred_y-y))/n+(lambd*beta)/n
    gradient=np.concatenate((grad_0,grad_1),axis=0)
    return gradient

  def sigmoid(self,x,beta):
    return 1/(1+np.exp(-(np.matmul(x,beta))))

  def bernoulli_noise(self,success_prob,n):
    noise=np.random.binomial(1, success_prob, n) 
    noise=np.reshape(noise, (n, 1)) # reshaping it to (n,1)
    return noise

  def Cross_Entropy_loss(self,m,y,predicted):
    return (-1/m)*(np.sum((y.T*np.log(predicted)) + ((1-y).T*(np.log(1-predicted)))))

  def gradient_Cross_Entropy_loss(self,X,Y,pred_y,n_sample):
   gradient=(np.matmul(X.T,(pred_y-Y)))/n_sample
   return gradient





# Answer 1


In [None]:
df=Logistic()
X,y,w=df.generator(4,4,0.1)
y

array([[1.],
       [1.],
       [0.],
       [1.]])

# Answer 2

In [None]:
cost,s=df.logistic_regression(X,y,10000,0.0000005,.001)
cost

2.3224835737212173

# Answer 3 

## L1 regularisation

In [None]:
cost,s=df.logistic_regression(X,y,10000,0.0000005,.001,0.7,'l1')
cost

0.592444914197579

## L2 Regularisation

In [None]:
cost,s=df.logistic_regression(X,y,10000,0.0000005,.001,0.7,'l2')
cost

0.39273071129701764

### [refrence for Lasso and Ridge regression](https://blog.alexlenail.me/what-is-the-difference-between-ridge-regression-the-lasso-and-elasticnet-ec19c71c9028#:~:text=tldr%3A%20%E2%80%9CRidge%E2%80%9D%20is%20a,If%20still%20confused%20keep%20reading%E2%80%A6)