In [1]:
import numpy as np

In [2]:
def sample_points(k):
    x = np.random.rand(k,50)
    y = np.random.choice([0, 1], size=k, p=[.5, .5]).reshape([-1,1])
    return x,y

In [3]:
x, y = sample_points(10)
print(x[0])
print(y[0])

[0.50085413 0.24295302 0.14185116 0.35753217 0.40088321 0.68094062
 0.65014807 0.47250302 0.52174673 0.37652713 0.96161274 0.7184758
 0.72663163 0.68449836 0.01649995 0.87995769 0.12940246 0.81647012
 0.24712716 0.54201573 0.76321204 0.85836369 0.83265051 0.61398207
 0.33077389 0.11346194 0.63686537 0.20201624 0.52844823 0.55186246
 0.31077917 0.91658018 0.44616741 0.69064677 0.91723487 0.36138512
 0.1527355  0.4939028  0.85421846 0.9297398  0.0109952  0.32573835
 0.62444273 0.95456336 0.32196629 0.54428154 0.13691608 0.38649084
 0.52037837 0.3293441 ]
[0]


In [4]:
class MetaSGD(object):
    def __init__(self):
        
        self.num_tasks = 2
        
        self.num_samples = 10

        self.epochs = 10000
        
        self.alpha = 0.0001
        
        self.beta = 0.0001
       
        # theta value
        self.theta = np.random.normal(size=50).reshape(50, 1)
         
        # alpha value
        self.alpha = np.random.normal(size=50).reshape(50, 1)
      
    # sigmoid function 
    def sigmoid(self,a):
        return 1.0 / (1 + np.exp(-a))
    
    
    # training part 
    def train(self):
        
        for e in range(self.epochs):        
            
            self.theta_ = []
            
            for i in range(self.num_tasks):
               
                # training data set 
                XTrain, YTrain = sample_points(self.num_samples)
                
                a = np.matmul(XTrain, self.theta)

                YHat = self.sigmoid(a)

                loss = ((np.matmul(-YTrain.T, np.log(YHat)) - np.matmul((1 -YTrain.T), np.log(1 - YHat)))/self.num_samples)[0][0]
                
                # gradient values 
                gradient = np.matmul(XTrain.T, (YHat - YTrain)) / self.num_samples

                # update theta values 
                self.theta_.append(self.theta - (np.multiply(self.alpha,gradient)))
                
     
            # meta gradient values 
            meta_gradient = np.zeros(self.theta.shape)
                        
            for i in range(self.num_tasks):
            
                # x test and y test values 
                XTest, YTest = sample_points(10)

                a = np.matmul(XTest, self.theta_[i])
                
                YPred = self.sigmoid(a)
                           
                # meta gradients
                meta_gradient += np.matmul(XTest.T, (YPred - YTest)) / self.num_samples

            #update  theta with the meta gradients
            self.theta = self.theta-self.beta*meta_gradient/self.num_tasks
                       
            # update the alpha with the meta gradients
            self.alpha = self.alpha-self.beta*meta_gradient/self.num_tasks
                                       
            if e%1000==0:
                print("Epoch {}: Loss {}\n".format(e,loss))           
                print('Updated Model Parameter Theta\n')
                print('Sampling Next Batch of Tasks \n')
                print('---------------------------------\n')

In [5]:
model = MetaSGD()

In [6]:
model.train()

Epoch 0: Loss 2.220636675285848

Updated Model Parameter Theta

Sampling Next Batch of Tasks 

---------------------------------

Epoch 1000: Loss 1.9629621097748235

Updated Model Parameter Theta

Sampling Next Batch of Tasks 

---------------------------------

Epoch 2000: Loss 1.1748940254869602

Updated Model Parameter Theta

Sampling Next Batch of Tasks 

---------------------------------

Epoch 3000: Loss 1.8254859897969087

Updated Model Parameter Theta

Sampling Next Batch of Tasks 

---------------------------------

Epoch 4000: Loss 1.183682834312381

Updated Model Parameter Theta

Sampling Next Batch of Tasks 

---------------------------------

Epoch 5000: Loss 2.2491124246129806

Updated Model Parameter Theta

Sampling Next Batch of Tasks 

---------------------------------

Epoch 6000: Loss 0.2336043698059756

Updated Model Parameter Theta

Sampling Next Batch of Tasks 

---------------------------------

Epoch 7000: Loss 1.9874010164246105

Updated Model Parameter Theta
