In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from random import randint
import os
import sys
#from torch import Dataset, Dataloader
torch.set_default_tensor_type('torch.DoubleTensor')
from torch.autograd import grad

In [2]:
import matplotlib.pyplot as plt
import numpy as np

In [3]:
dirName = './RESULTS'
if not os.path.exists(dirName):
    os.makedirs(dirName)

if torch.cuda.is_available():
    torch.backends.cudnn.benchmark = True
    device = torch.device('cuda')
else: 
    device = torch.device('cpu')
print(device)


cpu


In [4]:
class Sequence(nn.Module):
    def __init__(self, hidden, layer, features, dropout):
        super(Sequence, self).__init__()
        self.hidden   = hidden
        self.layer    = layer
        self.features = features

        self.lstm1  = nn.LSTM(self.features, self.hidden, self.layer, dropout=dropout)
        self.linear = nn.Linear(self.hidden, self.features)
 
    def forward(self, input, h_t, c_t):
        self.lstm1.flatten_parameters()
        out, (h_t, c_t) = self.lstm1(input, (h_t, c_t))
        output = out.view(input.size(0)*input.size(1),self.hidden)
        output = self.linear(output)
        output = output.view(input.size(0),input.size(1),self.features)
        return output 


In [5]:
if __name__ == '__main__':
    np.random.seed(0)
    # load data and make training set
    hidden   = 50
    layer    = 2
    features = 3
    dropout  = 0
    # build the model
    case = 'FULL'
        
    ini = 25
    train_loss= []
    train_loss_grad= []
    for k in [0] :
        name = './DATA/TRAIN_npy/'+case+'_'+str(k)+'.npy'
        data = np.load(name)
        data = np.expand_dims(data.T, axis=1)
        data = data[:100] # reduce the number of time steps (db)
        print("Data shape: ", data.shape)
        input  = torch.from_numpy(data[:-1,:,:]).double().to(device)
        target = torch.from_numpy(data[1:,:,:]).double().to(device)

        seq = Sequence(hidden,layer,features,dropout).double().to(device)
        criterion = nn.MSELoss()
        optimizer = optim.Adam(seq.parameters(), lr =0.01)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.7, patience=50, min_lr = 5e-5)
        dirName = './RESULTS/'+case+'_'+str(k)
        if not os.path.exists(dirName):
            os.makedirs(dirName)
        h_0 = torch.normal(mean=0.0, std=torch.ones(layer,input.size(1), hidden, dtype=torch.double)).to(device)
        c_0 = torch.normal(mean=0.0, std=torch.ones(layer,input.size(1), hidden, dtype=torch.double)).to(device)

        if k==0:
            torch.save(seq.state_dict(),'./init_model.pt')
        else:
            state_dict = torch.load('./init_model.pt', map_location=device)
            seq.load_state_dict(state_dict)

        err = 10
        loss1 = 1
        i = 0
        while loss1 > 1e-4 and i<200:

            optimizer.zero_grad()
            out = seq(input,h_0,c_0)
            loss = criterion(out, target.to(device))
            loss.backward()
            optimizer.step()
            scheduler.step(loss)
            train_loss.append(loss.item())
            #train_loss_grad.append()
            torch.save(seq.state_dict(),dirName+'/mytraining.pt')
            np.savetxt(dirName+'/loss.out',np.array([loss1]))
            if (i%(10)==0):
                loss2 = loss
                err = np.abs(loss2.item()-loss1)/10
                loss1 = loss2.item()
                print('FILE: ', k,'STEP TEST: ', i, 'test loss:', loss2.item(), 'lr: ', optimizer.param_groups[0]['lr'], 'err: ', err)
            i += 1
            sys.stdout.flush() 
        print('END: '+str(k))


Data shape:  (100, 1, 3)
FILE:  0 STEP TEST:  0 test loss: 274.896037366899 lr:  0.01 err:  27.3896037366899
FILE:  0 STEP TEST:  10 test loss: 163.58582999362304 lr:  0.01 err:  11.131020737327598
FILE:  0 STEP TEST:  20 test loss: 105.21877756578002 lr:  0.01 err:  5.836705242784302
FILE:  0 STEP TEST:  30 test loss: 66.15309715386644 lr:  0.01 err:  3.9065680411913575
FILE:  0 STEP TEST:  40 test loss: 45.102490104502934 lr:  0.01 err:  2.1050607049363506
FILE:  0 STEP TEST:  50 test loss: 35.44394389403481 lr:  0.01 err:  0.9658546210468124
FILE:  0 STEP TEST:  60 test loss: 30.826284587666823 lr:  0.01 err:  0.46176593063679866
FILE:  0 STEP TEST:  70 test loss: 28.485236887922614 lr:  0.01 err:  0.2341047699744209
FILE:  0 STEP TEST:  80 test loss: 26.897238310693414 lr:  0.01 err:  0.15879985772292002
FILE:  0 STEP TEST:  90 test loss: 25.712257605779527 lr:  0.01 err:  0.11849807049138868
FILE:  0 STEP TEST:  100 test loss: 24.664736794523375 lr:  0.01 err:  0.10475208112561525

In [63]:
optimizer.param_groups[0]

{'params': [Parameter containing:
  tensor([[-0.1051,  0.0461,  0.0972],
          [ 0.0266,  0.0331,  0.1372],
          [-0.0536,  0.1362,  0.0806],
          [ 0.0011, -0.0264, -0.1683],
          [ 0.0453,  0.0889,  0.1101],
          [ 0.0373,  0.1463,  0.2022],
          [ 0.0407, -0.1681, -0.1805],
          [-0.0230,  0.1704,  0.0933],
          [ 0.0358, -0.0634, -0.1613],
          [ 0.0313,  0.2110,  0.1102],
          [ 0.1723,  0.2067,  0.0348],
          [ 0.0910,  0.1237,  0.2059],
          [-0.0113, -0.0415,  0.1401],
          [-0.0009, -0.0108,  0.1577],
          [-0.0528, -0.0533,  0.0977],
          [ 0.0874,  0.2201,  0.0678],
          [ 0.1541,  0.1001,  0.0040],
          [-0.0257, -0.0331,  0.1875],
          [-0.0295,  0.1923,  0.0986],
          [-0.2453, -0.0874,  0.0129],
          [-0.1868, -0.0742,  0.1262],
          [-0.1441,  0.0448,  0.0712],
          [ 0.1947, -0.0247,  0.1920],
          [ 0.0357,  0.2140,  0.1224],
          [ 0.0616,  0.1788,  

In [67]:
loss_grads = grad(loss, optimizer.param_groups[0].items(0))


SyntaxError: Generator expression must be parenthesized (<ipython-input-67-d613366a3014>, line 1)

In [28]:
loss.backward()
print(loss.grad)

RuntimeError: Trying to backward through the graph a second time, but the saved intermediate results have already been freed. Specify retain_graph=True when calling backward the first time.

In [23]:
x = torch.ones(2, requires_grad=True, )
y = torch.pow(x,5)


In [22]:
out = torch.mean(y)
print(y)
print(out)
out.backward( retain_graph=True, create_graph=True)
print(x.grad)
print("out=",out)
out.backward()
print(x.grad)


tensor([1., 1.], grad_fn=<PowBackward0>)
tensor(1., grad_fn=<MeanBackward0>)
tensor([1.5000, 1.5000], grad_fn=<CopyBackwards>)
out= tensor(1., grad_fn=<MeanBackward0>)
tensor([3., 3.], grad_fn=<CopyBackwards>)


In [68]:
print(n_params)

31553


**Tentativo maldestro di calcolare la FISHER MATRIX**

In [None]:
if __name__ == '__main__':
    np.random.seed(0)
    # load data and make training set
    hidden   = 50
    layer    = 2
    features = 3
    dropout  = 0
    # build the model
    case = 'FULL'
    #-----------LIONEL STUFF--------------#
    n_params = sum(p.numel() for p in seq.parameters() if p.requires_grad)      # total nbr weights/biases
    data = np.load(name)
    data = np.expand_dims(data.T, axis=1)
    len_train=data.shape[0]//1
    sumgradexpcost = np.zeros(n_params)
    sumexpcost     = np.zeros(n_params)        
    cost = np.zeros(len_train-1)
    gradcost = np.zeros((n_params, len_train-1))
    
    #-----------EO LIONEL STUFF-----------#
    ini = 25
    train_loss= []
    train_loss_grad= []
    for k in [0] :
        name = './DATA/TRAIN_npy/'+case+'_'+str(k)+'.npy'
        data = np.load(name)
        data = np.expand_dims(data.T, axis=1)
        data = data[:100] # reduce the number of time steps (db)
        print("Data shape: ", data.shape)
        input  = torch.from_numpy(data[:-1,:,:]).double().to(device)
        target = torch.from_numpy(data[1:,:,:]).double().to(device)

        seq = Sequence(hidden,layer,features,dropout).double().to(device)
        criterion = nn.MSELoss()
        optimizer = optim.Adam(seq.parameters(), lr =0.01)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.7, patience=50, min_lr = 5e-5)
        dirName = './RESULTS/'+case+'_'+str(k)
        if not os.path.exists(dirName):
            os.makedirs(dirName)
        h_0 = torch.normal(mean=0.0, std=torch.ones(layer,input.size(1), hidden, dtype=torch.double)).to(device)
        c_0 = torch.normal(mean=0.0, std=torch.ones(layer,input.size(1), hidden, dtype=torch.double)).to(device)

        if k==0:
            torch.save(seq.state_dict(),'./init_model.pt')
        else:
            state_dict = torch.load('./init_model.pt', map_location=device)
            seq.load_state_dict(state_dict)

        err = 10
        loss1 = 1
        i = 0
        while loss1 > 1e-4 and i<200:

            optimizer.zero_grad()
            out = seq(input,h_0,c_0)
            loss = criterion(out, target.to(device))
            loss.backward()
            #-----------LIONEL STUFF--------------#
            loss_np = loss.item() 
            gradcost_theta = np.array([])
#            for param in seq.parameters():
            for param in list(seq.parameters()):
#                weight_or_bias = param.detach()           # extract from nn
                weight_or_bias = param
                w_or_b_grad = weight_or_bias.grad
                w_or_b_grad = w_or_b_grad.view(-1).numpy()    # converts into np vector
                gradcost_theta = np.append(gradcost_theta, w_or_b_grad)

            cost[i] = loss.item()
            gradcost[:,i] = gradcost_theta
            exploss = np.exp(-loss_np)
            sumgradexpcost += exploss * gradcost_theta
            sumexpcost     += exploss
            
            
            #-----------LIONEL STUFF--------------#
            optimizer.step()
            scheduler.step(loss)
            train_loss.append(loss.item())
            #train_loss_grad.append()
            torch.save(seq.state_dict(),dirName+'/mytraining.pt')
            np.savetxt(dirName+'/loss.out',np.array([loss1]))
            if (i%(10)==0):
                loss2 = loss
                err = np.abs(loss2.item()-loss1)/10
                loss1 = loss2.item()
                print('FILE: ', k,'STEP TEST: ', i, 'test loss:', loss2.item(), 'lr: ', optimizer.param_groups[0]['lr'], 'err: ', err)
            i += 1
            sys.stdout.flush() 
        print('END: '+str(k))
        
        normaliz = sumgradexpcost / sumexpcost
        Jacob = np.zeros((n_params, n_params))
        for irow in range(len_train-1):
            gradlogLikelirow = -gradcost[:,irow] + normaliz
            Jacob += np.outer(gradlogLikelirow, gradlogLikelirow)        # produces rank1 matrix Jacob            
        
#        InfoFisher = np.linalg.det(Jacob)
        global lamb_Fisher            
        lamb_Fisher = np.linalg.eigvals(Jacob)


Data shape:  (100, 1, 3)
FILE:  0 STEP TEST:  0 test loss: 275.77957749551746 lr:  0.01 err:  27.477957749551745
FILE:  0 STEP TEST:  10 test loss: 160.4649510896097 lr:  0.01 err:  11.531462640590775
FILE:  0 STEP TEST:  20 test loss: 103.37890124004038 lr:  0.01 err:  5.708604984956933
FILE:  0 STEP TEST:  30 test loss: 65.06304889480431 lr:  0.01 err:  3.8315852345236068
FILE:  0 STEP TEST:  40 test loss: 45.56618361896786 lr:  0.01 err:  1.9496865275836448
FILE:  0 STEP TEST:  50 test loss: 38.153735588798604 lr:  0.01 err:  0.7412448030169259
FILE:  0 STEP TEST:  60 test loss: 36.49977916900028 lr:  0.01 err:  0.16539564197983267
FILE:  0 STEP TEST:  70 test loss: 36.44273915356721 lr:  0.01 err:  0.005704001543306703
FILE:  0 STEP TEST:  80 test loss: 36.47238650678617 lr:  0.01 err:  0.002964735321896228
FILE:  0 STEP TEST:  90 test loss: 36.41053951547177 lr:  0.01 err:  0.006184699131440397
FILE:  0 STEP TEST:  100 test loss: 36.347821676026484 lr:  0.01 err:  0.00627178394452

In [33]:

        
        for i in range(len_train-1):
            optimizer.zero_grad()
            out = seq(input[i,:,:])
            targetloc = target[i,:,:]
            targetloc = targetloc[np.newaxis,:,:]
            loss = criterion(out, targetloc)
            loss_np = loss.item()
            loss.backward()
        
            gradcost_theta = np.array([])
#            for param in seq.parameters():
            for param in list(seq.parameters()):
#                weight_or_bias = param.detach()           # extract from nn
                weight_or_bias = param
                w_or_b_grad = weight_or_bias.grad
                w_or_b_grad = w_or_b_grad.view(-1).numpy()    # converts into np vector
                gradcost_theta = np.append(gradcost_theta, w_or_b_grad)

            cost[i] = loss.item()
            gradcost[:,i] = gradcost_theta
            exploss = np.exp(-loss_np)
            sumgradexpcost += exploss * gradcost_theta
            sumexpcost     += exploss
        
#        normaliz = sumgradexpcost / np.log(sumexpcost)
        normaliz = sumgradexpcost / sumexpcost
        Jacob = np.zeros((n_params, n_params))
        for irow in range(len_train-1):
            gradlogLikelirow = -gradcost[:,irow] + normaliz
            Jacob += np.outer(gradlogLikelirow, gradlogLikelirow)        # produces rank1 matrix Jacob            
        
#        InfoFisher = np.linalg.det(Jacob)
        global lamb_Fisher            
        lamb_Fisher = np.linalg.eigvals(Jacob)
        # FIN de Fisher information matrix -- LM




TypeError: forward() missing 2 required positional arguments: 'h_t' and 'c_t'