# import statements

In [1]:
import torch.nn.functional as F
import torch
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
from sklearn.metrics import log_loss
import pandas as pd

from torch.nn import BCELoss
#from scipy.optimize import fmin

# utils

In [2]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    m =  torch.max(x,dim=1,keepdims=True) 
    #print(m)
    e_x = torch.exp(torch.sub(x,m.values)) 
    return e_x / torch.sum(e_x,dim=1,keepdims=True)

In [3]:
def focus_(a,x):
  """
  focus function parametrized by a*x
  returns : averaged input for classification function
  """
  #print(a*x)
  out = softmax(a*x)
  #print(out)
  out = torch.sum(out*x,dim=1)
  return out

def classification_(b,c,x):
  """
  classification function parametrized by b*x + c
  returns  : sigmoid(b*x+c)
  """
  out = (b*x) + c
  out = 1/(1+torch.exp(-out))
  return out

In [4]:
def derv_g(b,c):
  """
   derivate of log-loss with respect to b and c using autograd
  """
  #print(yhat,y,yhat-y)
  #print()
  db = b.grad
  dc = c.grad
  # db = np.dot(xhat,yhat-y)/xhat.shape[0]
  # dc  = np.sum(yhat-y)/xhat.shape[0]
  return db,dc

In [5]:
def derv_f(a):
  """
  derivative of log-loss with respect to a using autograd

  """
  da = a.grad
  # da = np.sum((yhat-y)*b*(  ( (x[:,0] - x[:,1])* x[:,0] ) +  ( (x[:,1]- x[:,0]) * x[:,1] )   ) * (np.exp((a*x[:,0]+a*x[:,1]))/ (np.exp(a*x[:,0])+ np.exp(a*x[:,1]))**2 )) / xhat.shape[0] 
  
  # #print(  (  ( (x[:,0] - x[:,1])* x[:,0] ) +  ( (x[:,1]- x[:,0]) * x[:,1] )   ) * (np.exp((a*x[:,0]+a*x[:,1]))/ (np.exp(a*x[:,0])+ np.exp(a*x[:,1]))**2 ) )
    
  return da

In [6]:

def gd(w,dw):
  """
  updates given parameter in negative direction of gradient
  """
  eta = torch.tensor([0.1])
  with torch.no_grad():
    w = w - torch.dot(eta,dw)
  return w

# m = 2

In [7]:
X = torch.tensor([[-3,-1],[-1,-3],[1,-3],[-3,1]]) # mosaic data m = 2 , d= 1
Y = torch.tensor([0,0,1,1])

In [8]:
def minimize_b_c(x,y,a,b,c,epochs=1000):
  # b = 0 
  # c = 0 

  #a.requires_grad=False
  criterion = BCELoss()
  y = y.float()

  with torch.no_grad():
    x_average = focus_(a,x)
    yhat = classification_(b,c,x_average)
    initial_loss = criterion(yhat,y)


  print("x average at 0 epoch", x_average )
  print("yhat at 0 epoch",yhat)
  print("loss at 0 epoch",criterion(yhat,y).item())


  for i in range(epochs):
    a.requires_grad = False
    b.requires_grad = True
    c.requires_grad = True
    x_average = focus_(a,x)
    yhat = classification_(b,c,x_average)

    loss = criterion(yhat,y)
    b.retain_grad()
    c.retain_grad()

    loss.backward()

    #der_b,der_c = derv_g(b,c)
    #print(i,der_b,der_c)
    grad_b = b.grad
    grad_c = c.grad
    b = gd(b,grad_b)
    c = gd(c,grad_c)
    
    
    x_average = focus_(a,x)
    yhat = classification_(b,c,x_average)
    current_loss = criterion(yhat,y) 
    #print(current_loss<=(initial_loss/2) , current_loss,initial_loss)
    if current_loss<= (initial_loss)/2:
      break
  print("   ")
  with torch.no_grad():
    x_average = focus_(a,x)
    print("x average",x_average)
    yhat = classification_(b,c,x_average) 
    print("Y hat",yhat)
    current_loss  = criterion(yhat,y)

  print("Loss",current_loss.item(),i)
  
  return b,c,current_loss.item()



In [None]:
a = torch.tensor([0.],requires_grad=True)
b = torch.tensor([0.],requires_grad=True)
c = torch.tensor([0.],requires_grad=True)
b,c,loss = minimize_b_c(X,Y,a,b,c)

In [10]:
def minimize_a(x,y,a,b,c,epochs=1000):

  #b = 0 
  #c = 0 
  criterion = BCELoss()
  y = y.float()
  
  with torch.no_grad():
    x_average = focus_(a,x)
    yhat = classification_(b,c,x_average)
    initial_loss = criterion(yhat,y)
  print("x average at 0 epoch", x_average )
  print("yhat at 0 epoch",yhat)
  print("loss at 0 epoch",criterion(yhat,y).item())

  for i in range(epochs):
    a.requires_grad =True
    b.requires_grad = False
    c.requires_grad = False
    x_average = focus_(a,x)
    yhat = classification_(b,c,x_average)

    
    loss = criterion(yhat,y)
    a.retain_grad()
    loss.backward()

    der_a = derv_f(a)
    #print(i,der_a)
    a = gd(a,der_a)

    x_average = focus_(a,x)
    yhat = classification_(b,c,x_average)
    current_loss = criterion(yhat,y)
    if current_loss <= initial_loss/2:
      break 
  print("*"*60)
  with torch.no_grad():
    x_average = focus_(a,x)
    print("x average",x_average)
    yhat = classification_(b,c,x_average) 
    print("Y hat",yhat)
    current_loss = log_loss(y,yhat)
  print("Loss",current_loss.item(),i)
  return a,current_loss.item()

In [None]:
a = torch.tensor([0.],requires_grad=True)
b = torch.tensor([0.],requires_grad=False)
c = torch.tensor([0.],requires_grad=False)
a,loss = minimize_a(X,Y,a,b,c)

In [None]:
a = torch.tensor(np.linspace(-1,1,20),requires_grad=True,dtype=torch.float32)
b_list = []
c_list = []
loss_list = []
b = torch.tensor([0.],requires_grad=True)
c = torch.tensor([0.],requires_grad=True)
for a1 in a:
  #out =focus_(a1,X)
  #print(out)
  b,c,loss = minimize_b_c(X,Y,a=a1,b=b,c=c)
  b_list.append(b.item())
  c_list.append(c.item()) 
  #out= classification_(0,0,out)
  #print(out)
  loss_list.append(loss)



In [None]:
a = np.linspace(-1,1,20)
plt.figure(figsize=(6,5))
plt.plot(a,loss_list,"*-")
plt.grid()
#plt.xticks(a)
plt.xlabel("a")
plt.ylabel("log-loss")
plt.title("loss plot for fix value of a ")
plt.savefig("loss_fixed_a.png")

plt.figure(figsize=(6,5))
plt.plot(a,b_list,"*-")
plt.grid()
#plt.xticks(a)
plt.xlabel("a")
plt.ylabel("b")
plt.title("Minimized value of b for fixed a")
plt.savefig("minimized_b_fixed_a.png")


plt.figure(figsize=(6,5))
plt.plot(a,c_list,"*-")
plt.grid()
plt.xlabel("a")
plt.ylabel("c")
plt.title("Minimized value of c for fixed a")
plt.savefig("minimized_c_fixed_a.png")



In [None]:
loss_ = []
bb,cc= np.meshgrid(np.arange(-21,21,0.2),np.arange(-21,21,0.2))
b_ = bb.reshape(-1,1)
c_ = cc.reshape(-1,1) 
a_ = 0
x_average_ = focus_(a_,X)
yhat_  = classification_(b_,c_,x_average_) 
#print("Y hat",yhat_)
#Y_ = np.array([list(Y)]*40000)
for i in range(yhat_.shape[0]):
  loss_.append(log_loss(Y,yhat_[i]))
loss_ = np.array(loss_)#,axis=0)
plt.figure(figsize=(6,5))
cs = plt.contourf(b_.reshape(bb.shape),c_.reshape(cc.shape),loss_.reshape(bb.shape))
plt.xlabel("b")
plt.ylabel("c")
plt.colorbar(cs)

plt.scatter(0, 0,c="black",s=100)
plt.scatter(15.625194533788827, -1.5129474107626304,c="r",s=100)

plt.title("contour plot for fixed a = "+str(a_) )

plt.savefig("contour_b_c_a_0.png")


In [None]:
minimize_b_c(X,Y,0,0,0)

In [None]:
a = np.linspace(-10,10,5000)
loss = []
for a1 in a:
  out =focus_(a1,X)
  #print(out)
  out = classification_(-10,-10,out)
  #print(out)
  loss.append(log_loss(Y,out,))

plt.plot(a,loss)
plt.xlabel("a")
plt.ylabel("log-loss")
plt.title("loss plot for fix value of  b and c")
plt.savefig("loss_landscape_b_n10_c_n10.png")

# Alternate minimization

In [12]:
a = torch.tensor([0.],requires_grad=True)
b = torch.tensor([0.],requires_grad=True)
c = torch.tensor([0.],requires_grad=True)

criterion = BCELoss()
data = pd.DataFrame(columns=["sno","b_c_fixed","a_fixed","a_value","b_value","c_value","loss"]) 
Y_ = Y.float()
X_average = focus_(a,X)
Yhat = classification_(b,c,X_average)
initial_loss = criterion(Yhat,Y_)

#print(initial_loss)

k = 0 
data.loc[k] = [k,True,True,a.item(),b.item(),c.item(),initial_loss.item()]
k = k+1
j= 1

for i in range(0,40,2):
  print("Minimize b and c")
  b,c,loss = minimize_b_c(X,Y,a,b,c)
  #print(b,c)
  data.loc[k] = [j,False,True,a.item(),b.item(),c.item(),loss]
  print("*"*60)
  print("  ")
  print("minimize a")
  

  #print(a,b,c)

  a,loss = minimize_a(X,Y,a,b,c)
  data.loc[k+1] = [j,True,False,a.item(),b.item(),c.item(),loss]
  print("  ")
  k = k+2
  j = j+1

Minimize b and c
x average at 0 epoch tensor([-2., -2., -1., -1.])
yhat at 0 epoch tensor([0.5000, 0.5000, 0.5000, 0.5000])
loss at 0 epoch 0.6931471824645996
   
x average tensor([-2., -2., -1., -1.])
Y hat tensor([0.2220, 0.2220, 0.6431, 0.6431])
Loss 0.3461853861808777 293
************************************************************
  
minimize a
x average at 0 epoch tensor([-2., -2., -1., -1.])
yhat at 0 epoch tensor([0.2220, 0.2220, 0.6431, 0.6431])
loss at 0 epoch 0.3461853861808777
************************************************************
x average tensor([-1.7560, -1.7560, -0.0790, -0.0790])
Y hat tensor([0.3091, 0.3091, 0.9078, 0.9078])
Loss 0.2332252934575081 999
  
Minimize b and c
x average at 0 epoch tensor([-1.7560, -1.7560, -0.0790, -0.0790])
yhat at 0 epoch tensor([0.3091, 0.3091, 0.9078, 0.9078])
loss at 0 epoch 0.2332252860069275
   
x average tensor([-1.7560, -1.7560, -0.0790, -0.0790])
Y hat tensor([0.1087, 0.1087, 0.8889, 0.8889])
Loss 0.11640934646129608 42
***

In [13]:
data

Unnamed: 0,sno,b_c_fixed,a_fixed,a_value,b_value,c_value,loss
0,0,True,True,0.0,0.0,0.0,0.693147
1,1,False,True,0.0,1.843284,2.432323,0.346185
2,1,True,False,0.248986,1.843284,2.432323,0.233225
3,2,False,True,0.248986,2.494743,2.276875,0.116409
4,2,True,False,0.372226,2.494743,2.276875,0.100653
5,3,False,True,0.372226,3.107741,2.152291,0.050309
6,3,True,False,0.464734,3.107741,2.152291,0.045757
7,4,False,True,0.464734,3.717441,2.052544,0.022854
8,4,True,False,0.540375,3.717441,2.052544,0.021371
9,5,False,True,0.540375,4.331272,1.975327,0.01068


In [14]:
data.to_csv("data_2_m_2_1.csv",index=False)

In [15]:
a = torch.tensor([0.],requires_grad=True)
b = torch.tensor([0.],requires_grad=True)
c = torch.tensor([0.],requires_grad=True)

criterion = BCELoss()

data_1 = pd.DataFrame(columns=["sno","b_c_fixed","a_fixed","a_value","b_value","c_value","loss"]) 

X_average = focus_(a,X)
Yhat = classification_(b,c,X_average)
Y_ = Y.float()
initial_loss = criterion(Yhat,Y_)

k = 0 
data_1.loc[k] = [k,True,True,a.item(),b.item(),c.item(),initial_loss.item()]
k = k+1
j= 1

for i in range(0,40,2):


  print("minimize a")
  a,loss = minimize_a(X,Y,a,b,c)
  data_1.loc[k] = [j,True,False,a.item(),b.item(),c.item(),loss]
  print("*"*60)
  print("  ")
  
  print("Minimize b and c")
  b,c,loss = minimize_b_c(X,Y,a,b,c)
  data_1.loc[k+1] = [j,False,True,a.item(),b.item(),c.item(),loss]
  print("  ")
  k = k+2
  j = j+1

minimize a
x average at 0 epoch tensor([-2., -2., -1., -1.])
yhat at 0 epoch tensor([0.5000, 0.5000, 0.5000, 0.5000])
loss at 0 epoch 0.6931471824645996
************************************************************
x average tensor([-2., -2., -1., -1.])
Y hat tensor([0.5000, 0.5000, 0.5000, 0.5000])
Loss 0.6931471824645996 999
************************************************************
  
Minimize b and c
x average at 0 epoch tensor([-2., -2., -1., -1.])
yhat at 0 epoch tensor([0.5000, 0.5000, 0.5000, 0.5000])
loss at 0 epoch 0.6931471824645996
   
x average tensor([-2., -2., -1., -1.])
Y hat tensor([0.2220, 0.2220, 0.6431, 0.6431])
Loss 0.3461853861808777 293
  
minimize a
x average at 0 epoch tensor([-2., -2., -1., -1.])
yhat at 0 epoch tensor([0.2220, 0.2220, 0.6431, 0.6431])
loss at 0 epoch 0.3461853861808777
************************************************************
x average tensor([-1.7560, -1.7560, -0.0790, -0.0790])
Y hat tensor([0.3091, 0.3091, 0.9078, 0.9078])
Loss 0.23322

In [16]:
data_1.to_csv("data_2_m_2_2.csv",index=False)

In [17]:
data_1

Unnamed: 0,sno,b_c_fixed,a_fixed,a_value,b_value,c_value,loss
0,0,True,True,0.0,0.0,0.0,0.693147
1,1,True,False,0.0,0.0,0.0,0.693147
2,1,False,True,0.0,1.843284,2.432323,0.346185
3,2,True,False,0.248986,1.843284,2.432323,0.233225
4,2,False,True,0.248986,2.494743,2.276875,0.116409
5,3,True,False,0.372226,2.494743,2.276875,0.100653
6,3,False,True,0.372226,3.107741,2.152291,0.050309
7,4,True,False,0.464734,3.107741,2.152291,0.045757
8,4,False,True,0.464734,3.717441,2.052544,0.022854
9,5,True,False,0.540375,3.717441,2.052544,0.021371


# m = 9

In [18]:
X1 = torch.tensor([[-1,-3,-3,-3,-3,-3,-3,-3,-3],[1,-3,-3,-3,-3,-3,-3,-3,-3],
               [-3,-1,-3,-3,-3,-3,-3,-3,-3],[-3,1,-3,-3,-3,-3,-3,-3,-3],
               [-3,-3,-1,-3,-3,-3,-3,-3,-3],[-3,-3,1,-3,-3,-3,-3,-3,-3],
               [-3,-3,-3,-1,-3,-3,-3,-3,-3],[-3,-3,-3,1,-3,-3,-3,-3,-3],
               [-3,-3,-3,-3,-1,-3,-3,-3,-3],[-3,-3,-3,-3,1,-3,-3,-3,-3],
               [-3,-3,-3,-3,-3,-1,-3,-3,-3],[-3,-3,-3,-3,-3,1,-3,-3,-3],
               [-3,-3,-3,-3,-3,-3,-1,-3,-3],[-3,-3,-3,-3,-3,-3,1,-3,-3],
               [-3,-3,-3,-3,-3,-3,-3,-1,-3],[-3,-3,-3,-3,-3,-3,-3,1,-3],
               [-3,-3,-3,-3,-3,-3,-3,-3,-1],[-3,-3,-3,-3,-3,-3,-3,-3,1],]) # mosaic data m = 9 , d= 1
Y1 = torch.tensor([0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1])

In [19]:
a = torch.tensor([0.],requires_grad=True)
b = torch.tensor([0.],requires_grad=True)
c = torch.tensor([0.],requires_grad=True)

criterion = BCELoss()
data = pd.DataFrame(columns=["sno","b_c_fixed","a_fixed","a_value","b_value","c_value","loss"]) 
Y1_ = Y1.float()
X1_average = focus_(a,X1)
Yhat1 = classification_(b,c,X1_average)
initial_loss = criterion(Yhat1,Y1_)

#print(initial_loss)

k = 0 
data.loc[k] = [k,True,True,a.item(),b.item(),c.item(),initial_loss.item()]
k = k+1
j= 1

for i in range(0,40,2):
  print("Minimize b and c")
  b,c,loss = minimize_b_c(X1,Y1,a,b,c,epochs=20000)
  #print(b,c)
  data.loc[k] = [j,False,True,a.item(),b.item(),c.item(),loss]
  print("*"*60)
  print("  ")
  print("minimize a")
  

  #print(a,b,c)

  a,loss = minimize_a(X1,Y1,a,b,c,epochs=20000)
  data.loc[k+1] = [j,True,False,a.item(),b.item(),c.item(),loss]
  print("  ")
  k = k+2
  j = j+1

Minimize b and c
x average at 0 epoch tensor([-2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556,
        -2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556,
        -2.7778, -2.5556])
yhat at 0 epoch tensor([0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000,
        0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000])
loss at 0 epoch 0.6931471824645996
   
x average tensor([-2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556,
        -2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556,
        -2.7778, -2.5556])
Y hat tensor([0.2821, 0.6965, 0.2821, 0.6965, 0.2821, 0.6965, 0.2821, 0.6965, 0.2821,
        0.6965, 0.2821, 0.6965, 0.2821, 0.6965, 0.2821, 0.6965, 0.2821, 0.6965])
Loss 0.34656819701194763 15077
************************************************************
  
minimize a
x average at 0 epoch tensor([-2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556, -2.777

In [20]:
data

Unnamed: 0,sno,b_c_fixed,a_fixed,a_value,b_value,c_value,loss
0,0,True,True,0.0,0.0,0.0,0.693147
1,1,False,True,0.0,7.94103,21.124401,0.346568
2,1,True,False,0.110344,7.94103,21.124401,0.267054
3,2,False,True,0.110344,10.221791,25.885887,0.133527
4,2,True,False,0.175237,10.221791,25.885887,0.09967
5,3,False,True,0.175237,11.828184,28.848019,0.049835
6,3,True,False,0.224444,11.828184,28.848019,0.036294
7,4,False,True,0.224444,13.103193,30.899794,0.018147
8,4,True,False,0.265745,13.103193,30.899794,0.012918
9,5,False,True,0.265745,14.176758,32.377911,0.006459


In [21]:
data.to_csv("data_2_m_9_1.csv",index=False)

In [22]:
a = torch.tensor([0.],requires_grad=True)
b = torch.tensor([0.],requires_grad=True)
c = torch.tensor([0.],requires_grad=True)

criterion = BCELoss()
data = pd.DataFrame(columns=["sno","b_c_fixed","a_fixed","a_value","b_value","c_value","loss"]) 
Y1_ = Y1.float()
X1_average = focus_(a,X1)
Yhat1 = classification_(b,c,X1_average)
initial_loss = criterion(Yhat1,Y1_)

#print(initial_loss)

k = 0 
data.loc[k] = [k,True,True,a.item(),b.item(),c.item(),initial_loss.item()]
k = k+1
j= 1

for i in range(0,40,2):
  
  
  print("minimize a")
  a,loss = minimize_a(X1,Y1,a,b,c,epochs=20000)
  data.loc[k] = [j,True,False,a.item(),b.item(),c.item(),loss]

  print("*"*60)
  print("  ")

  print("Minimize b and c")
  b,c,loss = minimize_b_c(X1,Y1,a,b,c,epochs=20000)
  #print(b,c)
  data.loc[k+1] = [j,False,True,a.item(),b.item(),c.item(),loss]
  print("  ")
  k = k+2
  j = j+1

minimize a
x average at 0 epoch tensor([-2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556,
        -2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556,
        -2.7778, -2.5556])
yhat at 0 epoch tensor([0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000,
        0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000])
loss at 0 epoch 0.6931471824645996
************************************************************
x average tensor([-2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556,
        -2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556, -2.7778, -2.5556,
        -2.7778, -2.5556])
Y hat tensor([0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000,
        0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000])
Loss 0.6931471824645996 19999
************************************************************
  
Minimize b and c
x average at 0 epoch tensor([-2.7

In [23]:
data

Unnamed: 0,sno,b_c_fixed,a_fixed,a_value,b_value,c_value,loss
0,0,True,True,0.0,0.0,0.0,0.693147
1,1,True,False,0.0,0.0,0.0,0.693147
2,1,False,True,0.0,7.94103,21.124401,0.346568
3,2,True,False,0.110344,7.94103,21.124401,0.267054
4,2,False,True,0.110344,10.221791,25.885887,0.133527
5,3,True,False,0.175237,10.221791,25.885887,0.09967
6,3,False,True,0.175237,11.828184,28.848019,0.049835
7,4,True,False,0.224444,11.828184,28.848019,0.036294
8,4,False,True,0.224444,13.103193,30.899794,0.018147
9,5,True,False,0.265745,13.103193,30.899794,0.012918


In [24]:
data.to_csv("data_2_m_9_2.csv",index=False)

# m = 50

In [None]:
X2 = np.ones((50,50))*3
idx = np.arange(0,50,1)
X2[idx,idx] =  -1

X3 = np.ones((50,50))*3
X3[idx,idx] =  1

X3 = np.concatenate((X2,X3),axis=0)
print(X3,X3.shape)

Y3 = np.zeros((100))
Y3[50:] = 1
print(Y3,Y3.shape)

[[-1.  3.  3. ...  3.  3.  3.]
 [ 3. -1.  3. ...  3.  3.  3.]
 [ 3.  3. -1. ...  3.  3.  3.]
 ...
 [ 3.  3.  3. ...  1.  3.  3.]
 [ 3.  3.  3. ...  3.  1.  3.]
 [ 3.  3.  3. ...  3.  3.  1.]] (100, 50)
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1.] (100,)


In [None]:
def softmax_(x):
    """Compute softmax values for each sets of scores in x."""
    m =  torch.max(x,dim=1,keepdims=True) 
    print(m)
    e_x = torch.exp(torch.sub(x,m.values)) 
    return e_x / torch.sum(e_x,dim=1,keepdims=True)

In [None]:
a = torch.tensor([10.],requires_grad=True)
x = torch.tensor([[3.,-1.]])
out = torch.sum(softmax_(a*x) * x,dim=1)
print(out)

torch.return_types.max(
values=tensor([[30.]], grad_fn=<MaxBackward0>),
indices=tensor([[0]]))
tensor([3.], grad_fn=<SumBackward1>)


In [None]:
out.backward()

In [None]:
a.grad

tensor([6.7974e-17])

In [None]:
l = ( torch.exp(a*x[0,0],) + torch.exp(a*x[0,1]) ) **2


#print(l)


f1 = ( ( ( x[0,0] - x[0,1] ) * torch.exp(a*x[0,0] + a*x[0,1]) ) / l  ) * x[0,0]

f2 = ( ( ( x[0,1] - x[0,0] ) * torch.exp(a*x[0,0] + a*x[0,1]) ) / l  ) * x[0,1]
print(f1.item()+f2.item())

6.797367106130223e-17


In [None]:
x = np.array([[3,-1]])
a = 10
b = 1
c = 0
y = np.array([1])
xhat = np.sum(softmax(a*x) * x,axis=1)
print(xhat)
yhat = classification_(b,c,xhat)

[3.]


In [None]:
derv_f(x,xhat,y,yhat,a,b)

[6.79736681e-17]


-3.223710561997351e-18