In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.fftpack import fft, ifft, fftfreq
from scipy.signal import argrelextrema
import time
import peakutils
import pickle
import pandas as pd
from collections import defaultdict, OrderedDict

from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets, transforms
from torchvision.utils import make_grid
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

## Defining a function to save an object with `pickle`

In [2]:
def save_obj(obj, name):
    with open('../training_files/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

## Loading Ising data

In [None]:
data = pd.read_pickle('data/l04_full_state_phase.pkl')

In [None]:
data.head()

In [None]:
data.shape

#### Creating a Pytorch dataset

In [None]:
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(data, test_size= 0.2, random_state= 12)

In [None]:
X_train = torch.tensor(list(train_data['state']), dtype= torch.float)
y_train = torch.tensor(np.array(train_data[['ordered', 'desordered']]) ,dtype= torch.float)

X_test = torch.tensor(list(test_data['state']), dtype= torch.float)
y_test = torch.tensor(np.array(test_data[['ordered', 'desordered']]))

## Loading  Shwartz-Ziv/Tishby data

We make use of the the functions defined in `utils` by Saxe.

In [4]:
# some_file.py
import sys
sys.path.insert(0, '../estimators')

import utils

train, test = utils.get_IB_data('2017_12_21_16_51_3_275766')

#### Creating a Pytorch dataset

In [5]:
X_train = torch.tensor(train.X, dtype= torch.float)
y_train = torch.tensor(train.Y, dtype= torch.float)

X_test = torch.tensor(test.X, dtype= torch.float)
y_test = torch.tensor(test.Y, dtype= torch.float)

In [6]:
test.Y

array([[1., 0.],
       [0., 1.],
       [0., 1.],
       ...,
       [0., 1.],
       [0., 1.],
       [1., 0.]], dtype=float32)

### Creating a Pytorch dataset

In [34]:
batch_size = 5

In [35]:
X_train.shape

torch.Size([3277, 12])

In [36]:
X_test.shape

torch.Size([819, 12])

In [37]:
data = TensorDataset(X_train, y_train)
train_loader = DataLoader(data, batch_size = batch_size, shuffle=True)

## Constructing the network: `Net` class

In [38]:
class Net(nn.Module):
    '''
    net_layer: list with the number of neurons for each network layer, [n_imput, ..., n_output]
    '''
    def __init__(self, layers_size, out_size):
        super(Net, self).__init__()
        
        self.layers = nn.ModuleList()
        
        
        for k in range(len(layers_size) - 1):
            self.layers.append(nn.Linear(layers_size[k], layers_size[k+1]))
            
        # Output layer # Here we could choose a different activation function
        self.out = nn.Linear(layers_size[-1], out_size)
        
        ###### WEIGHT INITIALIZATION
        for m in self.layers:
            nn.init.normal_(m.weight, mean= 0, std= 1/np.sqrt(100*len(layers_size)))
            nn.init.constant_(m.bias, 0.0)
        
        nn.init.normal_(self.out.weight, mean= 0, std= 1/np.sqrt(100*len(layers_size)))
        nn.init.constant_(self.out.bias, 0.0)
        ############################
        
    def forward(self, x):
        
        j = 1
        #act_state_batch = []
        act_st_batch = {
            'activity' : []
            }
        
        for layer in self.layers:
                       
            x = F.tanh(layer(x))
                      
            act_st_batch['activity'].append( x.detach().numpy())
            
            j = j + 1
            
        output= F.softmax(self.out(x), dim=1)
        
        act_st_batch['activity'].append( output.detach().numpy())
            
        
        return output, act_st_batch

## Training



### Initializing the class `Net` and defining an optimizer and a loss function

In [41]:
########## DEFINING NETWORK ARCHITETURE

input_size = X_train.shape[1]
out_size = y_train.shape[1]
#layers_size = [input_size, 10, 7, 5, 4, 3]

layers_size = [input_size, 3]

net = Net(layers_size, out_size)     
print(net)

######### DEFINING OPTIMIZER AND LOSS FUNCTION

optimizer = optim.Adam(net.parameters(), lr= 0.004)
loss_func = nn.BCELoss()

######## INITIALIZING TRAINING AND STORING IMPORT10,7,5,4,3ANT INFORMATION

log_dic = {
    'epoch': [],
    'loss' : [],
    'loss_gen' : [],
    'data' : []
}


for epoch in range(10):
    
    weights = {'weights_norm' : [],
               'grad_mean'    : [],
               'grad_std'     : []
              }  # Recording weights norm
    
       
    loss_epoch = []
    
    #activations_epoch = {'activations': []}
    
    t0 = time.time()
    
    k_aux = 1
    
    ########################### LOOP OVER THE MINI-BATCHES ############################

    
    
    for _, (input_data, target) in enumerate(train_loader):
        
        prediction, act_state_batch = net(input_data)     # input x and predict based on x
                                                          # act_state provides the activation values for each neuron
                                                          # in each layer for each batch
        
        loss = loss_func(prediction, target)     # must be (1. nn output, 2. target)
        
        loss_epoch.append(loss.item())
          
        optimizer.zero_grad()   # clear gradients for next train
        loss.backward()         # backpropagation, compute gradients
        optimizer.step()
        
        print(act_state_batch)
        print(len(act_state_batch['activity']))
        print(len(act_state_batch['activity'][0]))
        print(len(act_state_batch['activity'][1]))
        #print(len(act_state_batch['activity'][2]))
        #print(len(act_state_batch['activity'][3]))
        #print(len(act_state_batch['activity'][4]))
        #print(len(act_state_batch['activity'][5]))
        print(len(loss_epoch))
        print(len(loss_epoch) > 1)
        #break
        
        
        ###### STORING ACTIVATIONS ######
        
        if len(loss_epoch) > 1:
            for r in range(len(act_state_batch['activity'])):
                act_state_batch['activity'][r] = np.append(act_state_batch_last['activity'][r], 
                                                           act_state_batch['activity'][r], axis= 0)
                
            print(act_state_batch)
                
        
            
        act_state_batch_last = act_state_batch.copy()
        
        
        if k_aux > 2:
            break
        
        k_aux = k_aux + 1
        
        #################################
        
    #for l in range(len(layers_size)):
    #    print(act_state_batch['activity'][l].shape)
        
    
    
    ###################################################################################
    

       
    #activations_epoch['activations'].append(act_state_batch)       
         
    #print(act_state_batch)
    #print(a)
    
    break
    
    ##################################################################################
    
    for n in range(0, 2*len(layers_size), 2):
        
        W = list(net.parameters())[n]
        
        weights['weights_norm'].append( np.linalg.norm(W.detach().numpy(), ord=2) )
        weights['grad_mean'].append( np.absolute(W.grad.mean().item()) )
        weights['grad_std'].append( W.grad.std().item())        
    
    ############## RECORDING
    log_dic['epoch'].append(epoch)
    log_dic['loss'].append(np.mean(loss_epoch))
    test_pred, _ = net(X_test)
    log_dic['loss_gen'].append(loss_func(test_pred, y_test ).item())
    #log_dic['data'].append({**act_state_batch})
    #if do_report(epoch):
    log_dic['data'].append({**act_state_batch, **weights})
    
    #if epoch == 2:
    #    break
    
    ##################################################################################
    
    
    #optimizer.step()        # apply gradients
    
     ############ TIME COMPUTING ALL THE INFORMATION ########
    t1 = time.time()
    ########################################################
    
    ########### TRAINING STATUS ########
    print('Epoch %d, Loss= %.10f, Time= %.4f' % (epoch, np.mean(loss_epoch), t1-t0))
    

Net(
  (layers): ModuleList(
    (0): Linear(in_features=12, out_features=3, bias=True)
  )
  (out): Linear(in_features=3, out_features=2, bias=True)
)
{'activity': [array([[-0.14506368,  0.07718648, -0.22604144],
       [ 0.18322146,  0.1474182 , -0.14231911],
       [ 0.02592443,  0.09238657, -0.00644962],
       [ 0.11892071,  0.21342322, -0.10445723],
       [ 0.10040651,  0.15205173, -0.20389418]], dtype=float32), array([[0.49683657, 0.5031634 ],
       [0.49393514, 0.5060649 ],
       [0.49755132, 0.5024486 ],
       [0.4931092 , 0.5068908 ],
       [0.4937353 , 0.50626475]], dtype=float32)]}
2
5
5
1
False
{'activity': [array([[ 0.09266253,  0.07034681, -0.10214622],
       [ 0.00514269, -0.09910725, -0.29751828],
       [ 0.19704197,  0.29849377, -0.04346497],
       [ 0.13904627,  0.13046984, -0.18014275],
       [ 0.03931467,  0.21123177, -0.01356388]], dtype=float32), array([[0.498619  , 0.50138104],
       [0.5022468 , 0.49775323],
       [0.49226692, 0.5077331 ],
       [0.

In [17]:
log_dic['data'][0]['activity'][5]

array([[0.48523843, 0.5147615 ],
       [0.4854554 , 0.5145446 ],
       [0.4856184 , 0.5143816 ],
       ...,
       [0.5000005 , 0.49999955],
       [0.50000024, 0.49999976],
       [0.5       , 0.49999997]], dtype=float32)

In [None]:
plt.plot(log_dic['epoch'], log_dic['loss'], label= 'Training error')
plt.plot(log_dic['epoch'], log_dic['loss_gen'], label= 'Test error')
plt.xlabel('Epoch', fontsize= 15)
plt.ylabel('Loss', fontsize= 15)
plt.legend()
plt.show()

In [None]:
plt.plot(log_dic['epoch'][::10], log_dic['loss'][::10], label= 'Training error')
plt.plot(log_dic['epoch'][::10], log_dic['loss_gen'][::10], label= 'Test error')
plt.xlabel('Epoch', fontsize= 15)
plt.ylabel('Loss', fontsize= 15)
plt.legend()
plt.show()

### Take a look in what is stored in `log_dic`

Epoch number zero.

In [None]:
log_dic['data'][0]

With `log_dic['data'][0]['activity']` we access activation values in the first epoch for each layer.

In [None]:
log_dic['data'][0]['activity']

The first layer is `log_dic['data'][0]['activity'][0]`, the second `log_dic['data'][0]['activity'][1]` and so on. 

In [None]:
log_dic['data'][0]['activity'][0]

The activation values for the last layers can be obtained without the knowledge about the number of layers.

In [None]:
log_dic['data'][0]['activity'][-1]

### Writing Python dictionary to a file

In [None]:
save_obj(log_dic, 'tishby_mini_batch')