In [3]:
import numpy as np
from itertools import product
import math
import random
import copy

According to Hinton's "A Practical Guide to Training Restricted Boltzmann Machines"

Initialize J matrix to zero mean and 0.01 sd. The diagonal will be 0.

Initizlize bias b_i  of visible unit i to log[pi/(1−pi)]

In [4]:
class IsingModel:
    def __init__(self, data, lr=0.1):
        self.lr = lr
        
        # Possible values the spins can take
        self.spin_values = [-1,1]
        
        # Create a ndarray from the data
        self.data_activations_matrix = np.array(data)
        # Calculate the shape of the matrix and save useful variables
        self.num_spins = self.data_activations_matrix.shape[1]
        self.num_samples_data = self.data_activations_matrix.shape[0]
        
        # Calculate the true data expectations
        # First calculate the correlation
        self.data_correlation = self.calculate_corr(self.data_activations_matrix)
        # Then, the mean
        self.data_mean = np.mean(self.data_activations_matrix, axis=0)
        
        
        # Calculate the initial J matrix. Fill only the upper diagonal
        # Filling with Normal(0, 0.01)
        np.random.seed(3)
        self.J = np.random.normal(loc=0.0, scale=.01, size=self.data_correlation.shape)
        lind = np.tril_indices_from(self.J)
        self.J[lind]=0
                
        # Calculate the initial H vector, each entry proportional to its probability (according to Hinton)
        #self.H = np.sum(self.data_activations_matrix, axis=0)/ float(self.num_samples_data)
        # Try with random H
        self.H = np.random.normal(loc=0.0, scale=.01, size=self.num_spins)
        
    # Calculate the correlation of a matrix
    def calculate_corr(self, activations_matrix):
        num_spins = activations_matrix.shape[1]
        num_samples_data = activations_matrix.shape[0]
        
        corr_matrix = np.zeros((self.num_spins, self.num_spins))
        for sigma in activations_matrix:
            # Calculate \sigma_i * \sigma_j
            for i in range(0, self.num_spins):
                for j in range(i+1, self.num_spins):
                    corr_matrix[i][j] += sigma[i]*sigma[j]
        corr_matrix = corr_matrix / float(num_samples_data)
        
        return corr_matrix
    
    # Calcualte the energy of the model's parameters given a sigma state
    def calculate_energy(self, sigma):
        energy = 0
        for i in range(0, self.num_spins):
            for j in range(i+1, self.num_spins):
                energy += sigma[i]*sigma[j]*self.J[i][j]

        for i in range(0, self.num_spins):
            energy += sigma[i]*self.H[i]

        return - energy
                
    
    # Flip a randomly selected spin
    def flip_spin(self, _sigma):
        sigma = copy.deepcopy(_sigma)
        spin_to_flip = random.choice(range(0, self.num_spins))

        spin = sigma[spin_to_flip]
        if spin == self.spin_values[0]:
            sigma[spin_to_flip] = self.spin_values[1]
        else:
            sigma[spin_to_flip] = self.spin_values[0]
        return sigma
        
        
    
    # Train the exact model
    def train(self, max_epochs = 500):
        
        totalParamVariation = math.inf
        stopCondition = 0.000005
        
        epoch = 1
        
        while totalParamVariation > stopCondition and epoch < max_epochs:
            # Calculate P(sigma) for every possible combination in the model

            prob_dict = {}
            for sigma in product(self.spin_values, repeat=self.num_spins):
                prob_dict[sigma] = math.exp(-self.calculate_energy(sigma))

            # Calculate the partition function
            Z = sum(prob_dict.values())

            # Normalize dividing by the partition function
            for sigma in prob_dict.keys():
                prob_dict[sigma] = prob_dict[sigma]/Z

            model_correlation = np.zeros(self.J.shape)
            model_mean = np.zeros(self.H.shape)
            for sigma in product(self.spin_values, repeat=self.num_spins):
                
                # Calculate the expectation of the correlations
                for i in range(0, self.num_spins):
                    for j in range(i+1, self.num_spins):
                        model_correlation[i][j] += sigma[i]*sigma[j]*prob_dict[sigma]

                # Calculate the expectation of the averages
                for i in range(0, self.num_spins):
                    model_mean[i] += sigma[i]*prob_dict[sigma]


            # Calculate the step size for every J_{ij}
            stepJ = self.lr * (self.data_correlation - model_correlation)
            # Take the step
            oldJ = copy.deepcopy(self.J)
            self.J = self.J + stepJ

            # Calculate the step size for every H_{i}
            stepH = self.lr * (self.data_mean - model_mean)
            # Calculate the variation of J for the termination condition
            # Take the step
            oldH = copy.deepcopy(self.H)
            self.H = self.H + stepH
            
            diffH = np.sum(np.absolute(self.H - oldH))
            diffJ = np.sum(np.sum(np.absolute(self.J - oldJ)))
            totalParamVariation = diffJ + diffH
            
            if epoch%100 == 0:
                print('Epoch', epoch, 'TotalParamVariation', round(totalParamVariation, 8))
                print(model_correlation, 'Corr Model')
                print(self.data_correlation, 'Corr Data')

                print(model_mean, 'Average Model')
                print(self.data_mean, 'Average Data')
                print()
            
            epoch += 1

        print(self.H, 'H')
        print(self.J, 'J')
        
    def random_walk(self, max_steps):
        # Generate a random initial spin configuration
        model_activations_list = []
        sigma_0 = random.choices([-1,1], k=self.num_spins)
        
        # Calculate the energy at this time
        energy_0 = self.calculate_energy(sigma_0)
        
        for i in range(0, max_steps):
            
            # Flip a spin and calculate the new energy
            sigma_t = self.flip_spin(sigma_0)
            energy_t = self.calculate_energy(sigma_t)
            delta_energy = energy_t - energy_0

            if delta_energy<=0: # If delta_energy<0, accept the spin flip, energetically favourable
                model_activations_list.append(sigma_t)
            

            else: # Check if transition probability p < uniform r
                p = math.exp(-delta_energy)
                r = random.random()

                if r<=p: # Accept spin flip
                    model_activations_list.append(sigma_t)
                else: # Refuse spin flip
                    model_activations_list.append(sigma_0)
            
            sigma_0 = sigma_t
            energy_0 = energy_t
        
        return model_activations_list
    
    def train_metropolis(self, max_epochs = 300, random_walk_len = 100):
        random.seed(3)
        totalParamVariation = math.inf
        stopCondition = 0.0005
        
        epoch = 1
        
        while totalParamVariation > stopCondition and epoch < max_epochs:
            model_activations_list = self.random_walk(random_walk_len)  
            model_activations_matrix = np.array(model_activations_list)
            # Calculate the data expectations
            model_correlation = self.calculate_corr(model_activations_matrix)
            model_mean = np.mean(model_activations_matrix, axis=0)
            
            # Calculate the step size for every J_{ij}
            stepJ = self.lr * (self.data_correlation - model_correlation)
            
            oldJ = copy.deepcopy(self.J)
            # Take the step
            self.J = self.J + stepJ

            # Calculate the step size for every H_{i}
            stepH = self.lr * (self.data_mean - model_mean)
            
            oldH = copy.deepcopy(self.H)
            # Take the step
            self.H = self.H + stepH
            
            # Calculate the variation of J and H for the termination condition
            diffH = np.sum(np.absolute(self.H - oldH))
            diffJ = np.sum(np.sum(np.absolute(self.J - oldJ)))
            totalParamVariation = diffJ + diffH
            
            if epoch%10 == 0 or epoch == max_epochs-1:
                print('Epoch', epoch, 'TotalParamVariation', round(totalParamVariation, 8))
                print(model_correlation, 'Corr Model')
                print(self.data_correlation, 'Corr Data')

                print(model_mean, 'Average Model')
                print(self.data_mean, 'Average Data')
                print()
                                    
            epoch +=1

        print(self.H, 'H')
        print(self.J, 'J')
        

In [5]:
activations_list = [[1,-1,1,-1], [1,-1,1,-1], [-1,1,-1,-1], [-1,1,1,1], [1,1,1,-1], [-1,-1,1,1], [1,-1,-1,1], [1,-1,1,1]]

In [14]:
ising = IsingModel(activations_list, lr=0.005)

In [9]:
ising.train_metropolis(max_epochs = 15000, random_walk_len = 400)


Epoch 10 TotalParamVariation 0.055875
[[ 0.    3.    1.   -2.75]
 [ 0.    0.    0.5  -1.75]
 [ 0.    0.    0.   -1.25]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[0.115 0.095 0.135 0.08 ] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 20 TotalParamVariation 0.0703
[[ 0.   -1.75  5.5  -5.25]
 [ 0.    0.   -0.75  0.  ]
 [ 0.    0.    0.   -0.75]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.065  0.     0.015 -0.14 ] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 30 TotalParamVariation 0.0704
[[ 0.   -3.5   0.25  1.25]
 [ 0.    0.    2.75 -2.75]
 [ 0.    0.    0.   -3.  ]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[-0.075  

Epoch 260 TotalParamVariation 0.061375
[[ 0.    1.    0.5  -0.25]
 [ 0.    0.    4.    0.75]
 [ 0.    0.    0.   -4.75]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.15  -0.14   0.19   0.005] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 270 TotalParamVariation 0.04565
[[ 0.   -2.25  2.   -2.75]
 [ 0.    0.   -0.75  1.  ]
 [ 0.    0.    0.   -0.75]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.15  -0.165  0.12   0.065] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 280 TotalParamVariation 0.070925
[[ 0.   -7.75  1.    0.  ]
 [ 0.    0.   -1.25 -0.25]
 [ 0.    0.    0.   -4.5 ]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data

Epoch 490 TotalParamVariation 0.081425
[[ 0.    3.   -4.25 -1.  ]
 [ 0.    0.   -6.25  0.  ]
 [ 0.    0.    0.   -0.75]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[-0.075 -0.255  0.3    0.005] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 500 TotalParamVariation 0.063675
[[ 0.    1.5   1.5   1.75]
 [ 0.    0.   -4.   -0.25]
 [ 0.    0.    0.    3.25]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.055 -0.225  0.345  0.11 ] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 510 TotalParamVariation 0.11475
[[ 0.    1.25 -0.75 -6.5 ]
 [ 0.    0.    4.5   5.75]
 [ 0.    0.    0.   -2.25]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data

Epoch 730 TotalParamVariation 0.08285
[[ 0.   -2.    0.75 -2.25]
 [ 0.    0.   -4.25 -7.25]
 [ 0.    0.    0.   -1.  ]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.16  -0.21   0.125 -0.065] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 740 TotalParamVariation 0.09295
[[  0.   -11.25  -1.    -0.75]
 [  0.     0.    -0.25   4.  ]
 [  0.     0.     0.     1.25]
 [  0.     0.     0.     0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.145 -0.16   0.255 -0.15 ] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 750 TotalParamVariation 0.082775
[[ 0.   -2.5   1.5   3.75]
 [ 0.    0.   -2.5  -2.75]
 [ 0.    0.    0.   -4.25]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0

Epoch 970 TotalParamVariation 0.0777
[[ 0.  -2.5  0.5 -0.5]
 [ 0.   0.   3.5 -4.5]
 [ 0.   0.   0.  -4.5]
 [ 0.   0.   0.   0. ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.165 -0.085  0.285 -0.075] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 980 TotalParamVariation 0.05025
[[ 0.   -1.25  0.75 -6.  ]
 [ 0.    0.   -1.   -1.75]
 [ 0.    0.    0.   -0.25]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.145 -0.25   0.11  -0.055] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 990 TotalParamVariation 0.0645
[[ 0.    0.    0.75 -0.75]
 [ 0.    0.   -7.25 -0.75]
 [ 0.    0.    0.   -3.5 ]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.21  -0.15   0.2

Epoch 1200 TotalParamVariation 0.087775
[[ 0.    3.25 -3.5   0.5 ]
 [ 0.    0.   -0.25  2.75]
 [ 0.    0.    0.    6.  ]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.2   -0.145  0.39   0.04 ] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 1210 TotalParamVariation 0.0813
[[ 0.   -1.25  6.5  -1.75]
 [ 0.    0.   -2.25  1.5 ]
 [ 0.    0.    0.   -3.75]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.205 -0.16   0.415 -0.04 ] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 1220 TotalParamVariation 0.02955
[[ 0.   -3.    2.75 -0.75]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Dat

Epoch 1450 TotalParamVariation 0.079575
[[ 0.   -1.   -3.75  3.  ]
 [ 0.    0.    0.75 -3.  ]
 [ 0.    0.    0.    3.75]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.05  -0.33   0.135  0.02 ] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 1460 TotalParamVariation 0.074225
[[ 0.    2.   -0.5   5.25]
 [ 0.    0.    0.   -3.25]
 [ 0.    0.    0.    2.25]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.225 -0.145  0.225  0.19 ] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 1470 TotalParamVariation 0.10475
[[ 0.  -7.   0.5  3. ]
 [ 0.   0.  -2.  -5.5]
 [ 0.   0.   0.   3.5]
 [ 0.   0.   0.   0. ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.32 -0.27

Epoch 1700 TotalParamVariation 0.071775
[[ 0.    2.   -3.75  1.5 ]
 [ 0.    0.   -4.25 -1.  ]
 [ 0.    0.    0.   -0.75]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.3   -0.1    0.175  0.08 ] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 1710 TotalParamVariation 0.0583
[[ 0.    0.75  5.   -0.25]
 [ 0.    0.    2.75 -1.  ]
 [ 0.    0.    0.   -1.25]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.31  -0.005  0.26  -0.115] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 1720 TotalParamVariation 0.06935
[[ 0.    0.75  0.    3.25]
 [ 0.    0.    1.25 -4.  ]
 [ 0.    0.    0.   -3.25]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Dat

Epoch 1950 TotalParamVariation 0.056725
[[ 0.    0.5  -1.5   4.25]
 [ 0.    0.   -2.   -1.75]
 [ 0.    0.    0.    0.25]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.14  -0.31   0.26   0.185] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 1960 TotalParamVariation 0.057925
[[ 0.   -4.    0.5  -2.75]
 [ 0.    0.    1.5   2.25]
 [ 0.    0.    0.   -0.75]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.325 -0.285  0.295  0.02 ] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 1970 TotalParamVariation 0.07055
[[ 0.    3.75 -1.25 -3.  ]
 [ 0.    0.    2.5  -0.75]
 [ 0.    0.    0.    1.75]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr D

Epoch 2190 TotalParamVariation 0.07475
[[ 0.   1.5  1.  -4. ]
 [ 0.   0.   0.5 -0.5]
 [ 0.   0.   0.   7. ]
 [ 0.   0.   0.   0. ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.175 -0.205  0.235  0.065] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 2200 TotalParamVariation 0.048775
[[ 0.   -3.75 -1.5  -2.5 ]
 [ 0.    0.   -0.75  0.25]
 [ 0.    0.    0.   -1.  ]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.2   -0.295  0.18  -0.09 ] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 2210 TotalParamVariation 0.059225
[[ 0.   -0.25 -2.    0.  ]
 [ 0.    0.    4.25  3.75]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.215 -0.1

Epoch 2420 TotalParamVariation 0.08085
[[ 0.    3.25  3.   -2.25]
 [ 0.    0.    3.25  2.  ]
 [ 0.    0.    0.   -1.25]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.17  -0.125  0.19  -0.155] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 2430 TotalParamVariation 0.082875
[[ 0.    3.5   5.    0.25]
 [ 0.    0.   -1.   -2.25]
 [ 0.    0.    0.    4.25]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.355 -0.175  0.395  0.04 ] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 2440 TotalParamVariation 0.078525
[[ 0.    6.25  0.25 -1.25]
 [ 0.    0.   -0.5   5.  ]
 [ 0.    0.    0.   -2.  ]
 [ 0.    0.    0.    0.  ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr D

KeyboardInterrupt: 

In [15]:
ising.train(max_epochs = 5000)

Epoch 100 TotalParamVariation 0.00699605
[[ 0.         -0.19793754  0.12884396 -0.09054725]
 [ 0.          0.         -0.13099727 -0.08177196]
 [ 0.          0.          0.          0.00369193]
 [ 0.          0.          0.          0.        ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.12735985 -0.13722528  0.21541325 -0.00561383] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 200 TotalParamVariation 0.00384165
[[ 0.         -0.3114147   0.21092501 -0.12247964]
 [ 0.          0.         -0.21240131 -0.11749875]
 [ 0.          0.          0.          0.00225246]
 [ 0.          0.          0.          0.        ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.21021611 -0.21585693  0.33624947 -0.00311981] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 300 TotalParamVariation 0.00236232

Epoch 2200 TotalParamVariation 0.00033875
[[ 0.00000000e+00 -4.77960629e-01  2.50278550e-01 -2.27732759e-01]
 [ 0.00000000e+00  0.00000000e+00 -2.50139885e-01 -2.27734703e-01]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  5.51504251e-05]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 2.50136181e-01 -2.50283761e-01  4.99773675e-01 -5.89887943e-05] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 2300 TotalParamVariation 0.00032415
[[ 0.00000000e+00 -4.78876154e-01  2.50251537e-01 -2.28664333e-01]
 [ 0.00000000e+00  0.00000000e+00 -2.50124934e-01 -2.28665858e-01]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  4.99179969e-05]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Dat

Epoch 3800 TotalParamVariation 0.00019737
[[ 0.00000000e+00 -4.86998941e-01  2.50080518e-01 -2.36918999e-01]
 [ 0.00000000e+00  0.00000000e+00 -2.50047979e-01 -2.36919043e-01]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.25285817e-05]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 2.50047877e-01 -2.50080624e-01  4.99971819e-01 -1.26185360e-05] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 3900 TotalParamVariation 0.00019235
[[ 0.00000000e+00 -4.87324797e-01  2.50075814e-01 -2.37248977e-01]
 [ 0.00000000e+00  0.00000000e+00 -2.50046049e-01 -2.37249011e-01]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.14651830e-05]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Dat

#### New Metropolis

In [36]:
class IsingModel:
    def __init__(self, data, lr=0.0005):
        self.lr = lr
        
        # Possible values the spins can take
        self.spin_values = [-1,1]
        
        # Create a ndarray from the data
        self.data_activations_matrix = np.array(data)
        # Calculate the shape of the matrix and save useful variables
        self.num_spins = self.data_activations_matrix.shape[1]
        self.num_samples_data = self.data_activations_matrix.shape[0]
        
        # Calculate the true data expectations
        # First calculate the correlation
        self.data_correlation = self.calculate_corr(self.data_activations_matrix)
        # Then, the mean
        self.data_mean = np.mean(self.data_activations_matrix, axis=0)
        
        
        # Calculate the initial J matrix. Fill only the upper diagonal
        # Filling with Normal(0, 0.01)
        np.random.seed(3)
        self.J = np.random.normal(loc=0.0, scale=.01, size=self.data_correlation.shape)
        lind = np.tril_indices_from(self.J)
        self.J[lind]=0
                
        # Calculate the initial H vector, each entry proportional to its probability (according to Hinton)
        #self.H = np.sum(self.data_activations_matrix, axis=0)/ float(self.num_samples_data)
        # Try with random H
        self.H = np.random.normal(loc=0.0, scale=.01, size=self.num_spins)
        
    # Train the exact model
    def train(self, max_epochs = 500):
        
        totalParamVariation = math.inf
        stopCondition = 0.000005
        
        epoch = 1
        
        while totalParamVariation > stopCondition and epoch < max_epochs:
            # Calculate P(sigma) for every possible combination in the model

            prob_dict = {}
            for sigma in product(self.spin_values, repeat=self.num_spins):
                prob_dict[sigma] = math.exp(-self.calculate_energy(sigma))

            # Calculate the partition function
            Z = sum(prob_dict.values())

            # Normalize dividing by the partition function
            for sigma in prob_dict.keys():
                prob_dict[sigma] = prob_dict[sigma]/Z

            model_correlation = np.zeros(self.J.shape)
            model_mean = np.zeros(self.H.shape)
            for sigma in product(self.spin_values, repeat=self.num_spins):
                
                # Calculate the expectation of the correlations
                for i in range(0, self.num_spins):
                    for j in range(i+1, self.num_spins):
                        model_correlation[i][j] += sigma[i]*sigma[j]*prob_dict[sigma]

                # Calculate the expectation of the averages
                for i in range(0, self.num_spins):
                    model_mean[i] += sigma[i]*prob_dict[sigma]


            # Calculate the step size for every J_{ij}
            stepJ = self.lr * (self.data_correlation - model_correlation)
            # Take the step
            oldJ = copy.deepcopy(self.J)
            self.J = self.J + stepJ

            # Calculate the step size for every H_{i}
            stepH = self.lr * (self.data_mean - model_mean)
            # Calculate the variation of J for the termination condition
            # Take the step
            oldH = copy.deepcopy(self.H)
            self.H = self.H + stepH
            
            diffH = np.sum(np.absolute(self.H - oldH))
            diffJ = np.sum(np.sum(np.absolute(self.J - oldJ)))
            totalParamVariation = diffJ + diffH
            
            if epoch%100 == 0:
                print('Epoch', epoch, 'TotalParamVariation', round(totalParamVariation, 8))
                print(model_correlation, 'Corr Model')
                print(self.data_correlation, 'Corr Data')

                print(model_mean, 'Average Model')
                print(self.data_mean, 'Average Data')
                print()
            
            epoch += 1

        print(self.H, 'H')
        print(self.J, 'J')

    # Calculate the energy of the model's parameters given a sigma state
    def calculate_energy(self, sigma):
        energy = 0
        for i in range(0, self.num_spins):
            for j in range(i+1, self.num_spins):
                energy += sigma[i]*sigma[j]*self.J[i][j]

        for i in range(0, self.num_spins):
            energy += sigma[i]*self.H[i]

        return - energy
    
    def get_delta_energy(self, i):
        #delta_energy = 2*(self.H[spin_to_flip]*self.sigma[spin_to_flip])
        
        delta_energy= 2 * (self.H[i]*self.sigma[i] \
                           + self.sigma[i] * np.dot(self.J[i, :], self.sigma) \
                           + self.sigma[i] * np.dot(self.J[:, i], self.sigma))
        
        return delta_energy
    
    def metropolis_step(self):
        # Randomly select a spin (now it can only select 0)
        spin_to_flip = np.random.randint(self.num_spins)

        delta_energy = 2*(self.H[spin_to_flip]*self.sigma[spin_to_flip])
        self.get_delta_energy(spin_to_flip)

        p = math.exp(-delta_energy)
        r = random.random()

        #print(delta_energy, r, p)
        if delta_energy <= 0 or r<=p:
            self.sigma[spin_to_flip] = -self.sigma[spin_to_flip]

    def metropolis_simulation(self, max_steps, burn_in_len=0):
        # Generate a random initial spin configuration
        self.sigma = random.choices(self.spin_values, k=self.num_spins)        
        
        # Create a dictionary to count the frequencies
        samples_dict = {}
        for i in range(0, max_steps):
            # Take a metropolis step
            self.metropolis_step()
            
            # Save the state if the burn-in phase has passed
            if i >= burn_in_len:
                
                if not tuple(copy.deepcopy(self.sigma)) in samples_dict: # If the state is new, create an entry
                    samples_dict[tuple(copy.deepcopy(self.sigma))] = 1
                else: # If the state already appeared, add one
                    samples_dict[tuple(copy.deepcopy(self.sigma))] += 1

        # Normalize the probability
        sum_states = sum(samples_dict.values())
        normalized_dict = {k: v / sum_states for k, v in samples_dict.items()}
                                                 
        return normalized_dict

    def get_statistics(self, states_dict):
        # Returns the magnetization and correlation from a dictionary of activations {sigma, prob}
        magnetization =  np.zeros(self.num_spins)
        corr_matrix = np.zeros((self.num_spins,self.num_spins))
        # Calculate the expectation
        for sigma, prob in states_dict.items():
            magnetization += np.dot(sigma,prob)
            for i in range(0, self.num_spins):
                for j in range(i+1, self.num_spins):
                    corr_matrix[i][j] += sigma[i]*sigma[j]*prob
        return magnetization, corr_matrix
    
    # Calculates the correlation of a list of activations
    def calculate_corr(self, activations_matrix):
        num_spins = activations_matrix.shape[1]
        num_samples_data = activations_matrix.shape[0]
        
        corr_matrix = np.zeros((self.num_spins, self.num_spins))
        for sigma in activations_matrix:
            # Calculate \sigma_i * \sigma_j
            for i in range(0, self.num_spins):
                for j in range(i+1, self.num_spins):
                    corr_matrix[i][j] += sigma[i]*sigma[j]
        corr_matrix = corr_matrix / float(num_samples_data)
        
        return corr_matrix
    
    def train_metropolis(self, max_epochs = 300, simulation_len = 100, burn_in_len=0):
        random.seed(3)
        totalParamVariation = math.inf
        stopCondition = 0.0000000005
        
        epoch = 1
        
        while totalParamVariation > stopCondition and epoch < max_epochs:

            samples_dict = self.metropolis_simulation(simulation_len, burn_in_len)
        
            # Calculate the data expectations        
            model_mean, model_correlation = self.get_statistics(samples_dict)
            
            # Calculate the step size for every J_{ij}
            stepJ = self.lr * (self.data_correlation - model_correlation)
            oldJ = copy.deepcopy(self.J)
            # Take the step
            self.J = self.J + stepJ
            
            # Calculate the step size for every H_{i}
            stepH = self.lr * (self.data_mean - model_mean)
            # Take the step
            oldH = copy.deepcopy(self.H)
            self.H = self.H + stepH
            
            # Calculate the variation of J and H for the termination condition
            diffH = np.sum(np.absolute(self.H - oldH))
            diffJ = np.sum(np.sum(np.absolute(self.J - oldJ)))
            totalParamVariation = diffJ + diffH
            
            if epoch%50 == 0 or epoch == max_epochs-1:
                print('Epoch', epoch, 'TotalParamVariation', round(totalParamVariation, 8))
                print(model_correlation, 'Corr Model')
                print(self.data_correlation, 'Corr Data')

                print(model_mean, 'Average Model')
                print(self.data_mean, 'Average Data')
                print()
                
            epoch +=1

        
        print(self.data_mean, 'Spins average')
        print(self.H, 'H')
        
    

In [37]:
activations_list = [[1,-1,1,-1], [1,-1,1,-1], [-1,1,-1,-1], [-1,1,1,1], [1,1,1,-1], [-1,-1,1,1], [1,-1,-1,1], [1,-1,1,1]]
s_ising = IsingModel(activations_list, lr=0.01)

In [None]:
s_ising.train_metropolis( max_epochs = 10000, simulation_len = 10000, burn_in_len=100)

Epoch 50 TotalParamVariation 0.01946869
[[ 0.         -0.01919192  0.03292929  0.02666667]
 [ 0.          0.         -0.04727273  0.01151515]
 [ 0.          0.          0.          0.00262626]
 [ 0.          0.          0.          0.        ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.13090909 -0.15292929  0.22242424  0.01171717] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 100 TotalParamVariation 0.01873737
[[ 0.          0.00222222  0.02929293 -0.00040404]
 [ 0.          0.         -0.03717172  0.00141414]
 [ 0.          0.          0.         -0.00181818]
 [ 0.          0.          0.          0.        ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.14343434 -0.12707071  0.31454545 -0.02020202] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 150 TotalParamVariation 0.01512525


Epoch 900 TotalParamVariation 0.01230505
[[ 0.         -0.07515152  0.14666667  0.00949495]
 [ 0.          0.         -0.11757576  0.00383838]
 [ 0.          0.          0.          0.00989899]
 [ 0.          0.          0.          0.        ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.26626263 -0.2630303   0.50707071  0.01030303] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 950 TotalParamVariation 0.01236162
[[ 0.         -0.0769697   0.12161616 -0.00161616]
 [ 0.          0.         -0.09919192  0.00585859]
 [ 0.          0.          0.         -0.00282828]
 [ 0.          0.          0.          0.        ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.24686869 -0.24545455  0.48161616 -0.00080808] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 1000 TotalParamVariation 0.0127798

Epoch 1800 TotalParamVariation 0.01228687
[[ 0.         -0.06323232  0.13717172  0.00464646]
 [ 0.          0.         -0.12242424 -0.0169697 ]
 [ 0.          0.          0.          0.00888889]
 [ 0.          0.          0.          0.        ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 2.45252525e-01 -2.77777778e-01  5.22222222e-01  2.02020202e-04] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 1850 TotalParamVariation 0.01317374
[[ 0.         -0.03212121  0.13414141 -0.00505051]
 [ 0.          0.         -0.09232323 -0.01212121]
 [ 0.          0.          0.         -0.00383838]
 [ 0.          0.          0.          0.        ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.24080808 -0.19494949  0.48080808  0.00585859] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 1900 TotalParamV

Epoch 2650 TotalParamVariation 0.01291111
[[ 0.         -0.04424242  0.11838384 -0.00383838]
 [ 0.          0.         -0.11050505  0.01333333]
 [ 0.          0.          0.         -0.00949495]
 [ 0.          0.          0.          0.        ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.24666667 -0.22949495  0.49070707 -0.01212121] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data

Epoch 2700 TotalParamVariation 0.01247071
[[ 0.         -0.07454545  0.11919192 -0.00484848]
 [ 0.          0.         -0.12747475  0.00020202]
 [ 0.          0.          0.          0.00848485]
 [ 0.          0.          0.          0.        ]] Corr Model
[[ 0.   -0.5   0.25 -0.25]
 [ 0.    0.   -0.25 -0.25]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]] Corr Data
[ 0.27656566 -0.25737374  0.47737374  0.00787879] Average Model
[ 0.25 -0.25  0.5   0.  ] Average Data



In [None]:
s_ising.train(1000)