# import statements

In [1]:
import torch.nn.functional as F
import torch
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
from sklearn.metrics import log_loss
import pandas as pd

from torch.nn import LogSoftmax, NLLLoss,CrossEntropyLoss
#from scipy.optimize import fmin


# utils

In [2]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    m =  torch.max(x,dim=1,keepdims=True) 
    #print(m)
    e_x = torch.exp(torch.sub(x,m.values)) 
    return e_x / torch.sum(e_x,dim=1,keepdims=True)

In [39]:
def focus_(a,x):
  """
  focus function parametrized by a*x
  returns : averaged input for classification function
  """
  #print(a*x)
  batch = x.shape[0]
  m = x.shape[1]
  d = x.shape[2]
  out = torch.matmul(x,a)[:,:,0]
  #print(out.shape)
  out = softmax(out)
  #print(out,x,out*x)
  #print(out.shape)
  out = out.view(batch,m,1)*x

  #print(out)
  #out = torch.sum(out*x,dim=1)
  dat = torch.zeros((batch,d))
  for i in range(batch):
    dat[i] = torch.sum(out[i],dim=0)

  return dat

def classification_(b,c,x):
  """
  classification function parametrized by b*x + c
  returns  : softmax(b*x+c)
  """
  out = torch.matmul(x,b)

  #c = torch.tensor(np.broadcast_to(c.numpy(), out.shape) )

  out = torch.add(out ,c)
  out = out
  return out

In [4]:
def derv_g(b,c):
  """
   derivate of log-loss with respect to b and c using autograd
  """
  #print(yhat,y,yhat-y)
  #print()
  db = b.grad
  dc = c.grad
  # db = np.dot(xhat,yhat-y)/xhat.shape[0]
  # dc  = np.sum(yhat-y)/xhat.shape[0]
  return db,dc

In [5]:
def derv_f(a):
  """
  derivative of log-loss with respect to a using autograd

  """
  da = a.grad
  # da = np.sum((yhat-y)*b*(  ( (x[:,0] - x[:,1])* x[:,0] ) +  ( (x[:,1]- x[:,0]) * x[:,1] )   ) * (np.exp((a*x[:,0]+a*x[:,1]))/ (np.exp(a*x[:,0])+ np.exp(a*x[:,1]))**2 )) / xhat.shape[0] 
  
  # #print(  (  ( (x[:,0] - x[:,1])* x[:,0] ) +  ( (x[:,1]- x[:,0]) * x[:,1] )   ) * (np.exp((a*x[:,0]+a*x[:,1]))/ (np.exp(a*x[:,0])+ np.exp(a*x[:,1]))**2 ) )
    
  return da

In [6]:

def gd(w,dw):
  """
  updates given parameter in negative direction of gradient
  """
  #print(w,dw)
  eta = torch.tensor([0.01])
  with torch.no_grad():
    w = w - torch.mul(eta,dw)
  return w

# m = 2

In [7]:
# X = torch.tensor([[3,-1],[-1,3],[1,3],[3,1]]) # mosaic data m = 2 , d= 1
# Y = torch.tensor([0,0,1,1])


# a = torch.tensor([[0.],[0.]])
# b = torch.tensor([[0.,0.],[0.,0.]])
# c = torch.tensor([0.,0.])



X = torch.tensor([[[1,1],[3.5,4]],   [[3.5,4],[1,1]],   [[2,1],[3.5,4]], [[3.5,4],[2,1]], [[1,2],[3.5,4]], [[3.5,4],[1,2]]  ])
Y = torch.tensor([0,0,1,1,2,2])

In [8]:
def minimize_b_c(x,y,a,b,c,epochs=1000):
  # b = 0 
  # c = 0 

  #a.requires_grad=False
  criterion = CrossEntropyLoss()
  #y = y.float()

  with torch.no_grad():
    x_average = focus_(a,x)
    yhat = classification_(b,c,x_average)
    initial_loss = criterion(yhat,y)


  print("x average at 0 epoch", x_average )
  print("yhat at 0 epoch",yhat)
  print("loss at 0 epoch",criterion(yhat,y).item())


  for i in range(epochs):
    a.requires_grad = False
    b.requires_grad = True
    c.requires_grad = True
    x_average = focus_(a,x)
    yhat = classification_(b,c,x_average)

    loss = criterion(yhat,y)
    b.retain_grad()
    c.retain_grad()

    loss.backward()

    #der_b,der_c = derv_g(b,c)
    #print(i,der_b,der_c)
    grad_b = b.grad
    grad_c = c.grad
    b = gd(b,grad_b)
    c = gd(c,grad_c)
    
    
    x_average = focus_(a,x)
    yhat = classification_(b,c,x_average)
    current_loss = criterion(yhat,y) 
    #print(current_loss<=(initial_loss/2) , current_loss,initial_loss)
    # if current_loss<= (initial_loss)/2:
    #   break
  print("   ")
  with torch.no_grad():
    x_average = focus_(a,x)
    print("x average",x_average)
    yhat = classification_(b,c,x_average) 
    print("Y hat",yhat)
    current_loss  = criterion(yhat,y)

  print("Loss",current_loss.item(),i)
  
  return b,c,current_loss.item()



In [9]:
# a = torch.tensor([0.],requires_grad=True)
# b = torch.tensor([0.],requires_grad=True)
# c = torch.tensor([0.],requires_grad=True)


a = torch.tensor([[0.],[0.]],requires_grad=True)
b = torch.tensor([[0.,0.,0.],[0.,0.,0.]],requires_grad=True)
c = torch.tensor([0.,0.,0.],requires_grad=True)



b,c,loss = minimize_b_c(X,Y,a,b,c,100000)

x average at 0 epoch tensor([[2.2500, 2.5000],
        [2.2500, 2.5000],
        [2.7500, 2.5000],
        [2.7500, 2.5000],
        [2.2500, 3.0000],
        [2.2500, 3.0000]])
yhat at 0 epoch tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
loss at 0 epoch 1.0986123085021973
   
x average tensor([[2.2500, 2.5000],
        [2.2500, 2.5000],
        [2.7500, 2.5000],
        [2.7500, 2.5000],
        [2.2500, 3.0000],
        [2.2500, 3.0000]])
Y hat tensor([[ 1.9780, -1.0629, -0.9235],
        [ 1.9780, -1.0629, -0.9235],
        [ 1.5083,  4.6957, -6.2130],
        [ 1.5083,  4.6957, -6.2130],
        [ 1.3871, -5.8353,  4.4386],
        [ 1.3871, -5.8353,  4.4386]])
Loss 0.06149733439087868 99999


In [10]:
def minimize_a(x,y,a,b,c,epochs=1000):

  #b = 0 
  #c = 0 
  criterion =CrossEntropyLoss()
  #y = y.float()
  
  with torch.no_grad():
    x_average = focus_(a,x)
    yhat = classification_(b,c,x_average)
    initial_loss = criterion(yhat,y)
  print("x average at 0 epoch", x_average )
  print("yhat at 0 epoch",yhat)
  print("loss at 0 epoch",criterion(yhat,y).item())

  for i in range(epochs):
    a.requires_grad =True
    b.requires_grad = False
    c.requires_grad = False
    x_average = focus_(a,x)
    yhat = classification_(b,c,x_average)

    
    loss = criterion(yhat,y)
    a.retain_grad()
    loss.backward()

    der_a = derv_f(a)
    #print(i,der_a)
    a = gd(a,der_a)

    x_average = focus_(a,x)
    yhat = classification_(b,c,x_average)
    current_loss = criterion(yhat,y)
    # if current_loss <= initial_loss/2:
    #   break 
  print("*"*60)
  with torch.no_grad():
    x_average = focus_(a,x)
    print("x average",x_average)
    yhat = classification_(b,c,x_average) 
    print("Y hat",yhat)
    current_loss = criterion(yhat,y)
  print("Loss",current_loss.item(),i)
  return a,current_loss.item()

In [11]:
# a = torch.tensor([0.],requires_grad=True)
# b = torch.tensor([0.],requires_grad=False)
# c = torch.tensor([0.],requires_grad=False)


a = torch.tensor([[0.],[0.]],requires_grad=True)
b = torch.tensor([[0.,0.,0.],[0.,0.,0.]],requires_grad=True)
c = torch.tensor([0.,0.,0.],requires_grad=True)


a,loss = minimize_a(X,Y,a,b,c,100000)

x average at 0 epoch tensor([[2.2500, 2.5000],
        [2.2500, 2.5000],
        [2.7500, 2.5000],
        [2.7500, 2.5000],
        [2.2500, 3.0000],
        [2.2500, 3.0000]])
yhat at 0 epoch tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
loss at 0 epoch 1.0986123085021973
************************************************************
x average tensor([[2.2500, 2.5000],
        [2.2500, 2.5000],
        [2.7500, 2.5000],
        [2.7500, 2.5000],
        [2.2500, 3.0000],
        [2.2500, 3.0000]])
Y hat tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
Loss 1.0986123085021973 99999


# Alternate minimization

In [55]:
# a = torch.tensor([0.],requires_grad=True)
# b = torch.tensor([0.],requires_grad=True)
# c = torch.tensor([0.],requires_grad=True)


a = torch.tensor([[0.],[0.]],requires_grad=True)
b = torch.tensor([[0.,0.,0.],[0.,0.,0.]],requires_grad=True)
c = torch.tensor([0.,0.,0.],requires_grad=True)

criterion = CrossEntropyLoss()
data = pd.DataFrame(columns=["sno","b_c_fixed","a_fixed","a_value","b_value","c_value","loss"]) 

#Y_ = Y.float()

X_average = focus_(a,X)
Yhat = classification_(b,c,X_average)
initial_loss = criterion(Yhat,Y)

#print(initial_loss)

k = 0 
data.loc[k] = [k,True,True,a.clone().detach().numpy(),b.clone().detach().numpy(),c.clone().detach().numpy(),initial_loss.item()]
k = k+1
j= 1

for i in range(0,40,2):
  print("Minimize b and c")
  b,c,loss = minimize_b_c(X,Y,a,b,c,10000)
  #print(b,c)
  data.loc[k] = [j,False,True,a.clone().detach().numpy(),b.clone().detach().numpy(),c.clone().detach().numpy(),loss]
  print("*"*60)
  print("  ")
  print("minimize a")
  

  #print(a,b,c)

  a,loss = minimize_a(X,Y,a,b,c,10000)
  data.loc[k+1] = [j,True,False,a.clone().detach().numpy(),b.clone().detach().numpy(),c.clone().detach().numpy(),loss]
  print("  ")
  k = k+2
  j = j+1

Minimize b and c
x average at 0 epoch tensor([[2.2500, 2.5000],
        [2.2500, 2.5000],
        [2.7500, 2.5000],
        [2.7500, 2.5000],
        [2.2500, 3.0000],
        [2.2500, 3.0000]])
yhat at 0 epoch tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
loss at 0 epoch 1.0986123085021973
   
x average tensor([[2.2500, 2.5000],
        [2.2500, 2.5000],
        [2.7500, 2.5000],
        [2.7500, 2.5000],
        [2.2500, 3.0000],
        [2.2500, 3.0000]])
Y hat tensor([[ 0.5278, -0.3263, -0.2014],
        [ 0.5278, -0.3263, -0.2014],
        [ 0.3643,  1.6976, -2.0619],
        [ 0.3643,  1.6976, -2.0619],
        [ 0.4321, -2.0587,  1.6266],
        [ 0.4321, -2.0587,  1.6266]])
Loss 0.3939603269100189 9999
************************************************************
  
minimize a
x average at 0 epoch tensor([[2.2500, 2.5000],
        [2.2500, 2.5000],
        [2.7500, 2.5000],
        [2.7500, 2

In [56]:
data

Unnamed: 0,sno,b_c_fixed,a_fixed,a_value,b_value,c_value,loss
0,0,True,True,"[[0.0], [0.0]]","[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]","[0.0, 0.0, 0.0]",1.098612
1,1,False,True,"[[0.0], [0.0]]","[[-0.3269485, 4.0479364, -3.720986], [-0.19137...","[1.7418519, -0.77230084, -0.9695512]",0.39396
2,1,True,False,"[[-1.0059414], [-1.2652781]]","[[-0.3269485, 4.0479364, -3.720986], [-0.19137...","[1.7418519, -0.77230084, -0.9695512]",0.179662
3,2,False,True,"[[-1.0059414], [-1.2652781]]","[[-0.7432039, 5.1157026, -4.3725095], [-0.7416...","[3.3028562, -1.6674803, -1.6353757]",0.070145
4,2,True,False,"[[-1.0607889], [-1.3646624]]","[[-0.7432039, 5.1157026, -4.3725095], [-0.7416...","[3.3028562, -1.6674803, -1.6353757]",0.070014
5,3,False,True,"[[-1.0607889], [-1.3646624]]","[[-1.0068345, 5.8030467, -4.79622], [-1.007383...","[4.1617494, -2.0958846, -2.0658817]",0.044041
6,3,True,False,"[[-1.0813762], [-1.4087586]]","[[-1.0068345, 5.8030467, -4.79622], [-1.007383...","[4.1617494, -2.0958846, -2.0658817]",0.044018
7,4,False,True,"[[-1.0813762], [-1.4087586]]","[[-1.188779, 6.2750616, -5.086282], [-1.189704...","[4.7496777, -2.3892918, -2.3604293]",0.031921
8,4,True,False,"[[-1.0915551], [-1.4349086]]","[[-1.188779, 6.2750616, -5.086282], [-1.189704...","[4.7496777, -2.3892918, -2.3604293]",0.031913
9,5,False,True,"[[-1.0915551], [-1.4349086]]","[[-1.3272418, 6.633486, -5.3062353], [-1.32811...","[5.1954336, -2.611795, -2.5836983]",0.024974


In [14]:
# data.to_csv("data_1_m_2_1.csv",index=False)

In [66]:
# a = torch.tensor([0.],requires_grad=True)
# b = torch.tensor([0.],requires_grad=True)
# c = torch.tensor([0.],requires_grad=True)


a = torch.tensor([[0.],[0.]],requires_grad=True)
b = torch.tensor([[0.,0.,0.],[0.,0.,0.]],requires_grad=True)
c = torch.tensor([0.,0.,0.],requires_grad=True)

criterion = CrossEntropyLoss()

data_1 = pd.DataFrame(columns=["sno","b_c_fixed","a_fixed","a_value","b_value","c_value","loss"]) 

X_average = focus_(a,X)
Yhat = classification_(b,c,X_average)
#Y_ = Y.float()
initial_loss = criterion(Yhat,Y)

k = 0 
data_1.loc[k] = [k,True,True,a.clone().detach().numpy(),b.clone().detach().numpy(),c.clone().detach().numpy(),initial_loss.item()]
k = k+1
j= 1

for i in range(0,40,2):


  print("minimize a")
  a,loss = minimize_a(X,Y,a,b,c,10000)
  data_1.loc[k] = [j,True,False,a.clone().detach().numpy(),b.clone().detach().numpy(),c.clone().detach().numpy(),loss]
  print("*"*60)
  print("  ")
  
  print("Minimize b and c")
  b,c,loss = minimize_b_c(X,Y,a,b,c,10000)
  data_1.loc[k+1] = [j,False,True,a.clone().detach().numpy(),b.clone().detach().numpy(),c.clone().detach().numpy(),loss]
  print("  ")
  k = k+2
  j = j+1

minimize a
x average at 0 epoch tensor([[2.2500, 2.5000],
        [2.2500, 2.5000],
        [2.7500, 2.5000],
        [2.7500, 2.5000],
        [2.2500, 3.0000],
        [2.2500, 3.0000]])
yhat at 0 epoch tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
loss at 0 epoch 1.0986123085021973
************************************************************
x average tensor([[2.2500, 2.5000],
        [2.2500, 2.5000],
        [2.7500, 2.5000],
        [2.7500, 2.5000],
        [2.2500, 3.0000],
        [2.2500, 3.0000]])
Y hat tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
Loss 1.0986123085021973 9999
************************************************************
  
Minimize b and c
x average at 0 epoch tensor([[2.2500, 2.5000],
        [2.2500, 2.5000],
        [2.7500, 2.5000],
        [2.7500, 2.5000],
        [2.2500, 3.0000],

In [58]:
# data_1.to_csv("data_1_m_2_2.csv",index=False) 0.005713

In [59]:
data_1

Unnamed: 0,sno,b_c_fixed,a_fixed,a_value,b_value,c_value,loss
0,0,True,True,"[[0.0], [0.0]]","[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]","[0.0, 0.0, 0.0]",1.098612
1,1,True,False,"[[0.0], [0.0]]","[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]","[0.0, 0.0, 0.0]",1.098612
2,1,False,True,"[[0.0], [0.0]]","[[-0.3269485, 4.0479364, -3.720986], [-0.19137...","[1.7418519, -0.77230084, -0.9695512]",0.39396
3,2,True,False,"[[-1.0059414], [-1.2652781]]","[[-0.3269485, 4.0479364, -3.720986], [-0.19137...","[1.7418519, -0.77230084, -0.9695512]",0.179662
4,2,False,True,"[[-1.0059414], [-1.2652781]]","[[-0.7432039, 5.1157026, -4.3725095], [-0.7416...","[3.3028562, -1.6674803, -1.6353757]",0.070145
5,3,True,False,"[[-1.0607889], [-1.3646624]]","[[-0.7432039, 5.1157026, -4.3725095], [-0.7416...","[3.3028562, -1.6674803, -1.6353757]",0.070014
6,3,False,True,"[[-1.0607889], [-1.3646624]]","[[-1.0068345, 5.8030467, -4.79622], [-1.007383...","[4.1617494, -2.0958846, -2.0658817]",0.044041
7,4,True,False,"[[-1.0813762], [-1.4087586]]","[[-1.0068345, 5.8030467, -4.79622], [-1.007383...","[4.1617494, -2.0958846, -2.0658817]",0.044018
8,4,False,True,"[[-1.0813762], [-1.4087586]]","[[-1.188779, 6.2750616, -5.086282], [-1.189704...","[4.7496777, -2.3892918, -2.3604293]",0.031921
9,5,True,False,"[[-1.0915551], [-1.4349086]]","[[-1.188779, 6.2750616, -5.086282], [-1.189704...","[4.7496777, -2.3892918, -2.3604293]",0.031913


# m = 9

In [60]:
X1 = torch.tensor([[  [1,1],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4]],
                   [  [2,1],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4]],
                   [  [1,2],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4]],
                   [  [3.5,4],[1,1],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4]],
                   [  [3.5,4],[2,1],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4]],
                   [  [3.5,4],[1,2],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4]],
                   [  [3.5,4],[3.5,4],[1,1],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4]],
                   [  [3.5,4],[3.5,4],[2,1],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4]],
                   [  [3.5,4],[3.5,4],[1,2],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4]],
                   [  [3.5,4],[3.5,4],[3.5,4],[1,1],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4]],
                   [  [3.5,4],[3.5,4],[3.5,4],[2,1],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4]],
                   [  [3.5,4],[3.5,4],[3.5,4],[1,2],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4]],
                   [  [3.5,4],[3.5,4],[3.5,4],[3.5,4],[1,1],[3.5,4],[3.5,4],[3.5,4],[3.5,4]],
                   [  [3.5,4],[3.5,4],[3.5,4],[3.5,4],[2,1],[3.5,4],[3.5,4],[3.5,4],[3.5,4]],
                   [  [3.5,4],[3.5,4],[3.5,4],[3.5,4],[1,2],[3.5,4],[3.5,4],[3.5,4],[3.5,4]],
                   [  [3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[1,1],[3.5,4],[3.5,4],[3.5,4]],
                   [  [3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[2,1],[3.5,4],[3.5,4],[3.5,4]],
                   [  [3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[1,2],[3.5,4],[3.5,4],[3.5,4]],
                   [  [3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[1,1],[3.5,4],[3.5,4]],
                   [  [3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[2,1],[3.5,4],[3.5,4]],
                   [  [3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[1,2],[3.5,4],[3.5,4]],
                   [  [3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[1,1],[3.5,4]],
                   [  [3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[2,1],[3.5,4]],
                   [  [3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[1,2],[3.5,4]],
                   [  [3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[1,1]],
                   [  [3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[2,1]],
                   [  [3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[3.5,4],[1,2]],]) # mosaic data m = 9 , d= 1
Y1 = torch.tensor([0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2])

In [61]:
# softmax(a*X1)
X1.shape,X.shape

(torch.Size([27, 9, 2]), torch.Size([6, 2, 2]))

In [62]:
a = torch.tensor([[1.],[2.]],requires_grad=True)
b = torch.tensor([[0.,0.,0.],[0.,0.,0.]],requires_grad=True)
c = torch.tensor([0.,0.,0.],requires_grad=True)

average = focus_(a,X1)


#print(torch.matmul(X1,a))

# b = 17.38
# c = -32.78
# # #a,loss = minimize_a(X1,Y1,a,b,c)

In [78]:
# a = torch.tensor([0.],requires_grad=True)
# b = torch.tensor([0.],requires_grad=True)
# c = torch.tensor([0.],requires_grad=True)

a = torch.tensor([[0.],[0.]],requires_grad=True)
b = torch.tensor([[0.,0.,0.],[0.,0.,0.]],requires_grad=True)
c = torch.tensor([0.,0.,0.],requires_grad=True)

criterion = CrossEntropyLoss()
data = pd.DataFrame(columns=["sno","b_c_fixed","a_fixed","a_value","b_value","c_value","loss"]) 
#Y1_ = Y1.float()
X1_average = focus_(a,X1)
Yhat1 = classification_(b,c,X1_average)
initial_loss = criterion(Yhat1,Y1)

#print(initial_loss)

k = 0 
data.loc[k] = [k,True,True,a.clone().detach().numpy(),b.clone().detach().numpy(),c.clone().detach().numpy(),initial_loss.item()]
k = k+1
j= 1

for i in range(0,40,2):
  print("Minimize b and c")
  b,c,loss = minimize_b_c(X1,Y1,a,b,c,epochs=20000)
  #print(b,c)
  data.loc[k] = [j,False,True,a.clone().detach().numpy(),b.clone().detach().numpy(),c.clone().detach().numpy(),loss]
  print("*"*60)
  print("  ")
  print("minimize a")
  

  #print(a,b,c)

  a,loss = minimize_a(X1,Y1,a,b,c,epochs=20000)
  data.loc[k+1] = [j,True,False,a.clone().detach().numpy(),b.clone().detach().numpy(),c.clone().detach().numpy(),loss]
  print("  ")
  k = k+2
  j = j+1

Minimize b and c
x average at 0 epoch tensor([[3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778],
        [3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778],
        [3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778],
        [3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778],
        [3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778],
        [3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778],
        [3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778],
        [3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778],
        [3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778]])
yhat at 0 epoch tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
 

In [79]:
data

Unnamed: 0,sno,b_c_fixed,a_fixed,a_value,b_value,c_value,loss
0,0,True,True,"[[0.0], [0.0]]","[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]","[0.0, 0.0, 0.0]",1.098612
1,1,False,True,"[[0.0], [0.0]]","[[-0.2330434, 3.5465553, -3.3135319], [0.02769...","[0.6747827, -0.27779776, -0.39699584]",0.883692
2,1,True,False,"[[-1.6182199], [-1.8297129]]","[[-0.2330434, 3.5465553, -3.3135319], [0.02769...","[0.6747827, -0.27779776, -0.39699584]",0.307433
3,2,False,True,"[[-1.6182199], [-1.8297129]]","[[-0.8892689, 5.670144, -4.780907], [-0.899577...","[3.8455381, -1.9679964, -1.8775539]",0.049854
4,2,True,False,"[[-1.6298165], [-1.8771737]]","[[-0.8892689, 5.670144, -4.780907], [-0.899577...","[3.8455381, -1.9679964, -1.8775539]",0.049842
5,3,False,True,"[[-1.6298165], [-1.8771737]]","[[-1.2377063, 6.5804768, -5.3427815], [-1.2545...","[4.981026, -2.5333025, -2.4477782]",0.026788
6,3,True,False,"[[-1.622664], [-1.8890946]]","[[-1.2377063, 6.5804768, -5.3427815], [-1.2545...","[4.981026, -2.5333025, -2.4477782]",0.026787
7,4,False,True,"[[-1.622664], [-1.8890946]]","[[-1.4543763, 7.141674, -5.687386], [-1.472334...","[5.679488, -2.88177, -2.79781]",0.018215
8,4,True,False,"[[-1.6131272], [-1.8914788]]","[[-1.4543763, 7.141674, -5.687386], [-1.472334...","[5.679488, -2.88177, -2.79781]",0.018214
9,5,False,True,"[[-1.6131272], [-1.8914788]]","[[-1.6113456, 7.54696, -5.9357605], [-1.629298...","[6.183505, -3.1333869, -3.0502567]",0.013773


In [51]:
# data.to_csv("data_1_m_9_1.csv",index=False)

In [65]:
# a = torch.tensor([0.],requires_grad=True)
# b = torch.tensor([0.],requires_grad=True)
# c = torch.tensor([0.],requires_grad=True)

a = torch.tensor([[0.],[0.]],requires_grad=True)
b = torch.tensor([[0.,0.,0.],[0.,0.,0.]],requires_grad=True)
c = torch.tensor([0.,0.,0.],requires_grad=True)

criterion = CrossEntropyLoss()

data = pd.DataFrame(columns=["sno","b_c_fixed","a_fixed","a_value","b_value","c_value","loss"]) 
#Y1_ = Y1.float()
X1_average = focus_(a,X1)
Yhat1 = classification_(b,c,X1_average)
initial_loss = criterion(Yhat1,Y1)

#print(initial_loss)

k = 0 
data.loc[k] = [k,True,True,a.clone().detach().numpy(),b.clone().detach().numpy(),c.clone().detach().numpy(),initial_loss.item()]
k = k+1
j= 1

for i in range(0,40,2):
  
  
  print("minimize a")
  a,loss = minimize_a(X1,Y1,a,b,c,epochs=20000)
  data.loc[k] = [j,True,False,a.clone().detach().numpy(),b.clone().detach().numpy(),c.clone().detach().numpy(),loss]

  print("*"*60)
  print("  ")

  print("Minimize b and c")
  b,c,loss = minimize_b_c(X1,Y1,a,b,c,epochs=20000)
  #print(b,c)
  data.loc[k+1] = [j,False,True,a.clone().detach().numpy(),b.clone().detach().numpy(),c.clone().detach().numpy(),loss]
  print("  ")
  k = k+2
  j = j+1

minimize a
x average at 0 epoch tensor([[3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778],
        [3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778],
        [3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778],
        [3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778],
        [3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778],
        [3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778],
        [3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778],
        [3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778],
        [3.2222, 3.6667],
        [3.3333, 3.6667],
        [3.2222, 3.7778]])
yhat at 0 epoch tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
       

In [77]:
data

Unnamed: 0,sno,b_c_fixed,a_fixed,a_value,b_value,c_value,loss
0,0,True,True,"[[0.0], [0.0]]","[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]","[0.0, 0.0, 0.0]",1.098612
1,1,True,False,"[[0.0], [0.0]]","[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]","[0.0, 0.0, 0.0]",1.098612
2,1,False,True,"[[0.0], [0.0]]","[[-0.2330434, 3.5465553, -3.3135319], [0.02769...","[0.6747827, -0.27779776, -0.39699584]",0.883692
3,2,True,False,"[[-1.6182199], [-1.8297129]]","[[-0.2330434, 3.5465553, -3.3135319], [0.02769...","[0.6747827, -0.27779776, -0.39699584]",0.307433
4,2,False,True,"[[-1.6182199], [-1.8297129]]","[[-0.8892689, 5.670144, -4.780907], [-0.899577...","[3.8455381, -1.9679964, -1.8775539]",0.049854
5,3,True,False,"[[-1.6298165], [-1.8771737]]","[[-0.8892689, 5.670144, -4.780907], [-0.899577...","[3.8455381, -1.9679964, -1.8775539]",0.049842
6,3,False,True,"[[-1.6298165], [-1.8771737]]","[[-1.2377063, 6.5804768, -5.3427815], [-1.2545...","[4.981026, -2.5333025, -2.4477782]",0.026788
7,4,True,False,"[[-1.622664], [-1.8890946]]","[[-1.2377063, 6.5804768, -5.3427815], [-1.2545...","[4.981026, -2.5333025, -2.4477782]",0.026787
8,4,False,True,"[[-1.622664], [-1.8890946]]","[[-1.4543763, 7.141674, -5.687386], [-1.472334...","[5.679488, -2.88177, -2.79781]",0.018215
9,5,True,False,"[[-1.6131272], [-1.8914788]]","[[-1.4543763, 7.141674, -5.687386], [-1.472334...","[5.679488, -2.88177, -2.79781]",0.018214


In [None]:
data.to_csv("data_1_m_9_2.csv",index=False)

# m = 50

In [None]:
X2 = np.ones((50,50))*3
idx = np.arange(0,50,1)
X2[idx,idx] =  -1

X3 = np.ones((50,50))*3
X3[idx,idx] =  1

X3 = np.concatenate((X2,X3),axis=0)
print(X3,X3.shape)

Y3 = np.zeros((100))
Y3[50:] = 1
print(Y3,Y3.shape)

[[-1.  3.  3. ...  3.  3.  3.]
 [ 3. -1.  3. ...  3.  3.  3.]
 [ 3.  3. -1. ...  3.  3.  3.]
 ...
 [ 3.  3.  3. ...  1.  3.  3.]
 [ 3.  3.  3. ...  3.  1.  3.]
 [ 3.  3.  3. ...  3.  3.  1.]] (100, 50)
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1.] (100,)


In [None]:
def softmax_(x):
    """Compute softmax values for each sets of scores in x."""
    m =  torch.max(x,dim=1,keepdims=True) 
    print(m)
    e_x = torch.exp(torch.sub(x,m.values)) 
    return e_x / torch.sum(e_x,dim=1,keepdims=True)

In [None]:
a = torch.tensor([10.],requires_grad=True)
x = torch.tensor([[3.,-1.]])
out = torch.sum(softmax_(a*x) * x,dim=1)
print(out)

torch.return_types.max(
values=tensor([[30.]], grad_fn=<MaxBackward0>),
indices=tensor([[0]]))
tensor([3.], grad_fn=<SumBackward1>)


In [None]:
out.backward()

In [None]:
a.grad

tensor([6.7974e-17])

In [None]:
l = ( torch.exp(a*x[0,0],) + torch.exp(a*x[0,1]) ) **2


#print(l)


f1 = ( ( ( x[0,0] - x[0,1] ) * torch.exp(a*x[0,0] + a*x[0,1]) ) / l  ) * x[0,0]

f2 = ( ( ( x[0,1] - x[0,0] ) * torch.exp(a*x[0,0] + a*x[0,1]) ) / l  ) * x[0,1]
print(f1.item()+f2.item())

6.797367106130223e-17


In [None]:
x = np.array([[3,-1]])
a = 10
b = 1
c = 0
y = np.array([1])
xhat = np.sum(softmax(a*x) * x,axis=1)
print(xhat)
yhat = classification_(b,c,xhat)

[3.]


In [None]:
derv_f(x,xhat,y,yhat,a,b)

[6.79736681e-17]


-3.223710561997351e-18