In [5]:
import pandas as pd
import numpy as np
from itertools import product
import math
import random
import copy

According to Hinton's "A Practical Guide to Training Restricted Boltzmann Machines"

Initialize J matrix to zero mean and 0.01 sd. The diagonal will be 0.

Initizlize bias b_i  of visible unit i to log[pi/(1−pi)]

In [194]:
class IsingModel:
    def __init__(self, data, lr=0.1):
        self.lr = lr
        
        # Possible values the spins can take
        self.spin_values = [-1,1]
        
        self.data_activations_matrix = pd.DataFrame(activations_list, columns=['sigma_0', 'sigma_1', 'sigma_2', 'sigma_3'])
        # Calculate the true data expectations
        self.data_spins_correlation = self.data_activations_matrix.corr().values
        lind = np.tril_indices_from(self.data_spins_correlation)
        self.data_spins_correlation[lind]=0
        self.data_spins_average = self.data_activations_matrix.mean().values
        
        self.num_spins = self.data_activations_matrix.shape[1]
        self.num_samples_data = self.data_activations_matrix.shape[0]

        # Calculate the initial J matrix. Fill only the upper diagonal
        # Filling with Normal(0, 0.01)
        np.random.seed(3)
        self.J = np.random.normal(loc=0.0, scale=.01, size=self.data_spins_correlation.shape)
        lind = np.tril_indices_from(self.J)
        self.J[lind]=0
                
        # Calculate the initial H vector, each entry proportional to its probability (according to Hinton)
        self.H = self.data_activations_matrix.sum()/self.num_samples_data
        # Try with random H
        #self.H = np.random.normal(loc=0.0, scale=.01, size=self.num_spins)
    
    # Calcualte the energy of the model's parameters given a sigma state
    def calculate_energy(self, sigma):
        energy = 0
        for i in range(0, self.num_spins):
            for j in range(i+1, self.num_spins):
                energy += sigma[i]*sigma[j]*self.J[i][j]

        for i in range(0, self.num_spins):
            energy += sigma[i]*self.H[i]

        return - energy
    
    # Flip a randomly selected spin
    def flip_spin(self, _sigma):
        sigma = copy.deepcopy(_sigma)
        spin_to_flip = random.choice(range(0, self.num_spins))

        spin = sigma[spin_to_flip]
        if spin == self.spin_values[0]:
            sigma[spin_to_flip] = self.spin_values[1]
        else:
            sigma[spin_to_flip] = self.spin_values[0]
        return sigma
        
        
    
    # Train the exact model
    def train(self, max_epochs = 500):
        
        totalParamVariation = math.inf
        stopCondition = 0.005
        
        epoch = 1
        
        while totalParamVariation > stopCondition and epoch < 500:
            # Calculate P(sigma) for every possible combination in the model

            prob_dict = {}
            for sigma in product(self.spin_values, repeat=self.num_spins):
                prob_dict[sigma] = math.exp(-self.calculate_energy(sigma))

            # Calculate the partition function
            Z = sum(prob_dict.values())

            # Normalize dividing by the partition function
            for sigma in prob_dict.keys():
                prob_dict[sigma] = prob_dict[sigma]/Z

            corr_model_exp = np.zeros(self.J.shape)
            mean_model_exp = np.zeros(self.H.shape)
            for sigma in product(self.spin_values, repeat=self.num_spins):
                
                # Calculate the expectation of the correlations
                for i in range(0, self.num_spins):
                    for j in range(i+1, self.num_spins):
                        corr_model_exp[i][j] += sigma[i]*sigma[j]*prob_dict[sigma]

                # Calculate the expectation of the averages
                for i in range(0, self.num_spins):
                    mean_model_exp[i] += sigma[i]*prob_dict[sigma]


            # Calculate the step size for every J_{ij}
            stepJ = self.lr * (self.data_spins_correlation - corr_model_exp)
            # Calculate the variation of J for the termination condition
            deltaJ = abs(self.J - stepJ).sum().sum()
            # Take the step
            self.J = self.J + stepJ

            # Calculate the step size for every H_{i}
            stepH = self.lr * (self.data_spins_average - mean_model_exp)
            # Calculate the variation of J for the termination condition
            deltaH = abs(self.H - stepH).sum()
            # Take the step
            self.H = self.H + stepH
            totalParamVariation = deltaH + deltaJ
            
            print('Epoch', epoch, 'TotalParamVariation', totalParamVariation)
            
            epoch += 1

        print(self.H)
        print(self.data_spins_correlation)
        print(self.J)
        
    def random_walk(self, max_steps=40):
        # Generate a random initial spin configuration
        model_activations_list = []
        sigma_0 = random.choices([-1,1], k=self.num_spins)
        
        # Calculate the energy at this time
        energy_0 = self.calculate_energy(sigma_0)
        
        for i in range(0, max_steps):
            
            # Flip a spin and calculate the new energy
            sigma_t = self.flip_spin(sigma_0)
            energy_t = self.calculate_energy(sigma_t)
            delta_energy = energy_t - energy_0

            if delta_energy<=0: # If delta_energy<0, accept the spin flip, energetically favourable
                model_activations_list.append(sigma_t)
            

            else: # Check if transition probability p < uniform r
                p = math.exp(-delta_energy)
                r = random.random()

                if r<=p: # Accept spin flip
                    model_activations_list.append(sigma_t)
                else: # Refuse spin flip
                    model_activations_list.append(sigma_0)
            
            sigma_0 = sigma_t
            energy_0 = energy_t
        
        return model_activations_list
    
    def train_metropolis(self, max_epochs = 300):
        random.seed(3)
        totalParamVariation = math.inf
        stopCondition = 0.0005
        
        epoch = 1
        randomWalkLength = 40
        
        while totalParamVariation > stopCondition and epoch < max_epochs:
            model_activations_list = self.random_walk(randomWalkLength)   
            model_activations_matrix = pd.DataFrame(model_activations_list, columns=['sigma_0', 'sigma_1', 'sigma_2', 'sigma_3'])
            # Calculate teh data expectations
            model_spins_correlation = model_activations_matrix.corr().values
            lind = np.tril_indices_from(model_spins_correlation)
            model_spins_correlation[lind]=0
            model_spins_average = model_activations_matrix.mean().values
            
            # Calculate the step size for every J_{ij}
            stepJ = self.lr * (self.data_spins_correlation - model_spins_correlation)
            # Calculate the variation of J for the termination condition
            deltaJ = abs(self.J - stepJ).sum().sum()
            # Take the step
            self.J = self.J + stepJ

            # Calculate the step size for every H_{i}
            stepH = self.lr * (self.data_spins_average - model_spins_average)
            # Calculate the variation of J for the termination condition
            deltaH = abs(self.H - stepH).sum()
            # Take the step
            self.H = self.H + stepH
            totalParamVariation = deltaH + deltaJ
            
            
            print('Epoch', epoch, 'TotalParamVariation', totalParamVariation)
            
            epoch +=1

        print(self.H)
        print(self.data_spins_correlation)
        print(self.J)
        

In [195]:
activations_list = [[1,-1,1,-1], [1,-1,1,-1], [-1,1,-1,-1], [-1,1,1,1], [1,1,1,-1], [-1,-1,1,1], [1,-1,-1,1], [1,-1,1,1]]

In [196]:
ising = IsingModel(activations_list, lr=0.0005)

In [198]:
ising.train_metropolis(max_epochs = 600)


Epoch 1 TotalParamVariation 1.0386251871067014
Epoch 2 TotalParamVariation 1.0409685621272495
Epoch 3 TotalParamVariation 1.0411801991156893
Epoch 4 TotalParamVariation 1.0421684409112768
Epoch 5 TotalParamVariation 1.042256708046085
Epoch 6 TotalParamVariation 1.0433404201006085
Epoch 7 TotalParamVariation 1.0424107375902232
Epoch 8 TotalParamVariation 1.0443897299082576
Epoch 9 TotalParamVariation 1.0434225142315678
Epoch 10 TotalParamVariation 1.0435380811498343
Epoch 11 TotalParamVariation 1.0443926240436652
Epoch 12 TotalParamVariation 1.0450715537406852
Epoch 13 TotalParamVariation 1.0452122539945772
Epoch 14 TotalParamVariation 1.0452558066241338
Epoch 15 TotalParamVariation 1.0464874071014318
Epoch 16 TotalParamVariation 1.0467225530182034
Epoch 17 TotalParamVariation 1.0480430468213973
Epoch 18 TotalParamVariation 1.048737313495888
Epoch 19 TotalParamVariation 1.0487808553572215
Epoch 20 TotalParamVariation 1.0510161251091168
Epoch 21 TotalParamVariation 1.0505111317133733
Epo

Epoch 239 TotalParamVariation 1.2609566281079856
Epoch 240 TotalParamVariation 1.2617781232535044
Epoch 241 TotalParamVariation 1.2627687579847833
Epoch 242 TotalParamVariation 1.2642370099350346
Epoch 243 TotalParamVariation 1.265117877159387
Epoch 244 TotalParamVariation 1.2650064889705646
Epoch 245 TotalParamVariation 1.2654940761698699
Epoch 246 TotalParamVariation 1.2670801262687994
Epoch 247 TotalParamVariation 1.2678627547271266
Epoch 248 TotalParamVariation 1.2692237941500748
Epoch 249 TotalParamVariation 1.2715848214802778
Epoch 250 TotalParamVariation 1.2719943954350843
Epoch 251 TotalParamVariation 1.2727102655466938
Epoch 252 TotalParamVariation 1.2730257973612211
Epoch 253 TotalParamVariation 1.2727395553309466
Epoch 254 TotalParamVariation 1.2744108827803853
Epoch 255 TotalParamVariation 1.2744993700907488
Epoch 256 TotalParamVariation 1.276992412141643
Epoch 257 TotalParamVariation 1.2778919962418893
Epoch 258 TotalParamVariation 1.278121629337511
Epoch 259 TotalParamVar

Epoch 479 TotalParamVariation 1.47171665969698
Epoch 480 TotalParamVariation 1.4732120463220644
Epoch 481 TotalParamVariation 1.473936845196124
Epoch 482 TotalParamVariation 1.474072106356728
Epoch 483 TotalParamVariation 1.4759441487935128
Epoch 484 TotalParamVariation 1.4771685333089528
Epoch 485 TotalParamVariation 1.478127205699743
Epoch 486 TotalParamVariation 1.4803198004186575
Epoch 487 TotalParamVariation 1.4808650568171797
Epoch 488 TotalParamVariation 1.4812364690962814
Epoch 489 TotalParamVariation 1.4814306407479463
Epoch 490 TotalParamVariation 1.4827289445150402
Epoch 491 TotalParamVariation 1.4838973253169272
Epoch 492 TotalParamVariation 1.4850542461734635
Epoch 493 TotalParamVariation 1.4844580796393076
Epoch 494 TotalParamVariation 1.4861909087591767
Epoch 495 TotalParamVariation 1.486343221897879
Epoch 496 TotalParamVariation 1.487244814579109
Epoch 497 TotalParamVariation 1.4886216211604526
Epoch 498 TotalParamVariation 1.4881576504814569
Epoch 499 TotalParamVariati

In [193]:
ising.train(max_epochs= 600)

Epoch 1 TotalParamVariation 1.039811912152953
Epoch 2 TotalParamVariation 1.0399087674052108
Epoch 3 TotalParamVariation 1.040006570628456
Epoch 4 TotalParamVariation 1.040104252054163
Epoch 5 TotalParamVariation 1.0402018118244487
Epoch 6 TotalParamVariation 1.0402992500813
Epoch 7 TotalParamVariation 1.0403965669665733
Epoch 8 TotalParamVariation 1.0404937626219963
Epoch 9 TotalParamVariation 1.0405908371891675
Epoch 10 TotalParamVariation 1.0406877908095546
Epoch 11 TotalParamVariation 1.0407846236244978
Epoch 12 TotalParamVariation 1.0408813357752067
Epoch 13 TotalParamVariation 1.0409779274027626
Epoch 14 TotalParamVariation 1.0410743986481172
Epoch 15 TotalParamVariation 1.0411707496520932
Epoch 16 TotalParamVariation 1.041266980555385
Epoch 17 TotalParamVariation 1.041363091498557
Epoch 18 TotalParamVariation 1.041459082622046
Epoch 19 TotalParamVariation 1.0415549540661595
Epoch 20 TotalParamVariation 1.041650705971076
Epoch 21 TotalParamVariation 1.0417463384768457
Epoch 22 To

Epoch 350 TotalParamVariation 1.1902994002023455
Epoch 351 TotalParamVariation 1.1907099683912197
Epoch 352 TotalParamVariation 1.1911202965127805
Epoch 353 TotalParamVariation 1.1915303847833418
Epoch 354 TotalParamVariation 1.1919402334190152
Epoch 355 TotalParamVariation 1.1923498426357122
Epoch 356 TotalParamVariation 1.1927592126491422
Epoch 357 TotalParamVariation 1.1931683436748148
Epoch 358 TotalParamVariation 1.1935772359280383
Epoch 359 TotalParamVariation 1.1939858896239213
Epoch 360 TotalParamVariation 1.1943943049773713
Epoch 361 TotalParamVariation 1.194802482203096
Epoch 362 TotalParamVariation 1.1952104215156034
Epoch 363 TotalParamVariation 1.1956181231292011
Epoch 364 TotalParamVariation 1.1960255872579972
Epoch 365 TotalParamVariation 1.196432814115901
Epoch 366 TotalParamVariation 1.1968398039166215
Epoch 367 TotalParamVariation 1.1972465568736685
Epoch 368 TotalParamVariation 1.1976530732003536
Epoch 369 TotalParamVariation 1.198059353109789
Epoch 370 TotalParamVar

In [17]:
class SimpleIsingModel:
    def __init__(self, data, lr=0.1):
        self.lr = lr
        
        # Possible values the spins can take
        self.spin_values = [-1,1]
        
        self.data_activations_matrix = pd.DataFrame(activations_list, columns=['sigma_0', 'sigma_1', 'sigma_2', 'sigma_3'])
        # Calculate the true data expectations
        self.data_spins_correlation = self.data_activations_matrix.corr().values
        lind = np.tril_indices_from(self.data_spins_correlation)
        self.data_spins_correlation[lind]=0
        self.data_spins_average = self.data_activations_matrix.mean().values
        
        self.num_spins = self.data_activations_matrix.shape[1]
        self.num_samples_data = self.data_activations_matrix.shape[0]
                
        # Calculate the initial H vector, each entry proportional to its probability (according to Hinton)
        #self.H = self.data_activations_matrix.sum()/self.num_samples_data
        # Try with random H
        self.H = np.random.normal(loc=0.0, scale=.01, size=self.num_spins)
    
    # Calcualte the energy of the model's parameters given a sigma state
    def calculate_energy(self, sigma):
        energy = 0

        for i in range(0, self.num_spins):
            energy += sigma[i]*self.H[i]

        return - energy
    
    # Flip a randomly selected spin
    def flip_spin(self, _sigma):
        sigma = copy.deepcopy(_sigma)
        spin_to_flip = random.choice(range(0, self.num_spins))

        spin = sigma[spin_to_flip]
        if spin == self.spin_values[0]:
            sigma[spin_to_flip] = self.spin_values[1]
        else:
            sigma[spin_to_flip] = self.spin_values[0]
        return sigma
        
        
    
    # Train the exact model
    def train(self, max_epochs = 500):
        
        totalParamVariation = math.inf
        stopCondition = 0.005
        
        epoch = 1
        
        while totalParamVariation > stopCondition and epoch < 500:
            # Calculate P(sigma) for every possible combination in the model

            prob_dict = {}
            for sigma in product(self.spin_values, repeat=self.num_spins):
                prob_dict[sigma] = math.exp(-self.calculate_energy(sigma))

            # Calculate the partition function
            Z = sum(prob_dict.values())

            # Normalize dividing by the partition function
            for sigma in prob_dict.keys():
                prob_dict[sigma] = prob_dict[sigma]/Z

            mean_model_exp = np.zeros(self.H.shape)
            for sigma in product(self.spin_values, repeat=self.num_spins):
                # Calculate the expectation of the averages
                for i in range(0, self.num_spins):
                    mean_model_exp[i] += sigma[i]*prob_dict[sigma]

            # Calculate the step size for every H_{i}
            stepH = self.lr * (self.data_spins_average - mean_model_exp)
            # Calculate the variation of J for the termination condition
            deltaH = abs(self.H - stepH).sum()
            # Take the step
            self.H = self.H + stepH
            totalParamVariation = deltaH
            
            print('Epoch', epoch, 'TotalParamVariation', totalParamVariation)
            
            epoch += 1

        print(self.H)
        print(self.data_spins_correlation)
        
    def random_walk(self, max_steps=40):
        # Generate a random initial spin configuration
        model_activations_list = []
        sigma_0 = random.choices([-1,1], k=self.num_spins)
        
        # Calculate the energy at this time
        energy_0 = self.calculate_energy(sigma_0)
        
        for i in range(0, max_steps):
            
            # Flip a spin and calculate the new energy
            sigma_t = self.flip_spin(sigma_0)
            energy_t = self.calculate_energy(sigma_t)
            delta_energy = energy_t - energy_0

            if delta_energy<=0: # If delta_energy<0, accept the spin flip, energetically favourable
                model_activations_list.append(sigma_t)
            

            else: # Check if transition probability p < uniform r
                p = math.exp(-delta_energy)
                r = random.random()

                if r<=p: # Accept spin flip
                    model_activations_list.append(sigma_t)
                else: # Refuse spin flip
                    model_activations_list.append(sigma_0)
            
            sigma_0 = sigma_t
            energy_0 = energy_t
        
        return model_activations_list
    
    def train_metropolis(self, max_epochs = 300):
        random.seed(3)
        totalParamVariation = math.inf
        stopCondition = 0.0005
        
        epoch = 1
        randomWalkLength = 40
        
        while totalParamVariation > stopCondition and epoch < max_epochs:
            model_activations_list = self.random_walk(randomWalkLength)   
            model_activations_matrix = pd.DataFrame(model_activations_list, columns=['sigma_0', 'sigma_1', 'sigma_2', 'sigma_3'])
            # Calculate teh data expectations
            model_spins_correlation = model_activations_matrix.corr().values
            lind = np.tril_indices_from(model_spins_correlation)
            model_spins_correlation[lind]=0
            model_spins_average = model_activations_matrix.mean().values
            
            # Calculate the step size for every H_{i}
            stepH = self.lr * (self.data_spins_average - model_spins_average)
            # Calculate the variation of J for the termination condition
            deltaH = abs(self.H - stepH).sum()
            # Take the step
            self.H = self.H + stepH
            totalParamVariation = deltaH
            
            
            print('Epoch', epoch, 'TotalParamVariation', totalParamVariation)
            
            epoch +=1

        print(self.H)
        print(self.data_spins_correlation)


In [15]:
activations_list = [[1,-1,1,-1], [1,-1,1,-1], [-1,1,-1,-1], [-1,1,1,1], [1,1,1,-1], [-1,-1,1,1], [1,-1,-1,1], [1,-1,1,1]]
s_ising = SimpleIsingModel(activations_list, lr=0.0005)


In [18]:
s_ising.train_metropolis(max_epochs = 300)

Epoch 1 TotalParamVariation 0.21387261171750735
Epoch 2 TotalParamVariation 0.21559761171750733
Epoch 3 TotalParamVariation 0.21662261171750735
Epoch 4 TotalParamVariation 0.21667261171750735
Epoch 5 TotalParamVariation 0.21729761171750733
Epoch 6 TotalParamVariation 0.21794761171750734
Epoch 7 TotalParamVariation 0.21724761171750734
Epoch 8 TotalParamVariation 0.21702261171750736
Epoch 9 TotalParamVariation 0.21754761171750733
Epoch 10 TotalParamVariation 0.21767261171750735
Epoch 11 TotalParamVariation 0.21797261171750734
Epoch 12 TotalParamVariation 0.21864761171750732
Epoch 13 TotalParamVariation 0.2200226117175073
Epoch 14 TotalParamVariation 0.2203976117175073
Epoch 15 TotalParamVariation 0.22022261171750734
Epoch 16 TotalParamVariation 0.22127261171750734
Epoch 17 TotalParamVariation 0.22112261171750736
Epoch 18 TotalParamVariation 0.22062261171750736
Epoch 19 TotalParamVariation 0.22199761171750737
Epoch 20 TotalParamVariation 0.22232261171750736
Epoch 21 TotalParamVariation 0.

Epoch 247 TotalParamVariation 0.3147976117175075
Epoch 248 TotalParamVariation 0.31592261171750746
Epoch 249 TotalParamVariation 0.3157976117175075
Epoch 250 TotalParamVariation 0.31689761171750747
Epoch 251 TotalParamVariation 0.3167726117175075
Epoch 252 TotalParamVariation 0.31749761171750746
Epoch 253 TotalParamVariation 0.3182476117175075
Epoch 254 TotalParamVariation 0.3182476117175075
Epoch 255 TotalParamVariation 0.3189726117175075
Epoch 256 TotalParamVariation 0.3195976117175075
Epoch 257 TotalParamVariation 0.3193226117175075
Epoch 258 TotalParamVariation 0.32064761171750744
Epoch 259 TotalParamVariation 0.3213726117175075
Epoch 260 TotalParamVariation 0.32194761171750746
Epoch 261 TotalParamVariation 0.3216726117175075
Epoch 262 TotalParamVariation 0.3222476117175075
Epoch 263 TotalParamVariation 0.3225226117175074
Epoch 264 TotalParamVariation 0.3225476117175074
Epoch 265 TotalParamVariation 0.32349761171750746
Epoch 266 TotalParamVariation 0.32362261171750745
Epoch 267 Tot