In [1]:
import matplotlib.pyplot as plt
import tensorflow as tf
from input_pipeline import get_param_dict, benchmark
from tqdm import tqdm
import numpy as np

2023-10-16 07:34:25.364878: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-16 07:34:25.411496: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.


# Dataset

In [2]:
def load_npy_file(file_path):
    return np.load(file_path)

def load_numpy_file_wrapper(file_path):
    return tf.numpy_function(load_npy_file, [file_path], tf.int32)

def dataset_pipeline_old(path, flatten=True, batch_size=1):
    print("Getting data from " + path)
    dataset = tf.data.Dataset.list_files(f"{path}/*/*.npy")
    print(f"Got {len(dataset)} samples")
    dataset = dataset.map(load_numpy_file_wrapper)
    if flatten:
        dataset = dataset.map(lambda x: tf.reshape(x, [-1]))
    else:
        dataset = dataset.map(lambda x: tf.expand_dims(x, 2))
    dataset = dataset.map(lambda x: (x+1)/2)
    dataset = dataset.map(lambda x: tf.cast(x, tf.float32))
    dataset = dataset.batch(batch_size)
    return dataset

def dataset_pipeline(path, flatten=True, batch_size=1):
    print("Getting data from " + path)
    dataset = tf.data.Dataset.list_files(f"{path}/*/*.npy")
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    print(f"Got {len(dataset)} samples")
    dataset = dataset.map(load_numpy_file_wrapper)
    dataset = dataset.batch(batch_size)
    if flatten:
        dataset = dataset.map(lambda x: tf.reshape(x, [batch_size, -1]), num_parallel_calls=tf.data.AUTOTUNE)
    else:
        dataset = dataset.map(lambda x: tf.expand_dims(x, 3), num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.map(lambda x: (x+1)/2, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.map(lambda x: tf.cast(x, tf.float32), num_parallel_calls=tf.data.AUTOTUNE)
    return dataset


In [3]:
path = "../GetData/Rust/get_data_rust/Data/Trainset"

In [4]:
params = get_param_dict(path)
print("Parameters used in this dataset:")
params

Parameters used in this dataset:


{'Steps': '1000',
 'Simulatiton Number': '10000',
 'Temperature': '2.73',
 'Magnetic Field': '0',
 'Mattize Size': '64'}

In [5]:
dataset = dataset_pipeline(path, batch_size=100, flatten=False)
benchmark(dataset)

Getting data from ../GetData/Rust/get_data_rust/Data/Trainset
Got 10000 samples
Number of examples:  10000
Execution time: 3.892809787000033


# Model

### Restricted Boltzman Machine

In [None]:
class RBM:
    def __init__(self, num_visible, num_hidden, learning_rate):
        self.num_visible = num_visible
        self.num_hidden = num_hidden
        self.learning_rate = learning_rate
        self.W = tf.Variable(tf.random.normal([num_visible, num_hidden]))
        self.visible_bias = tf.Variable(tf.zeros([num_visible]))
        self.hidden_bias = tf.Variable(tf.zeros([num_hidden]))

    def sample_h_given_v(self, v):
        p_h_given_v = tf.nn.sigmoid(tf.matmul(v, self.W) + self.hidden_bias)
        p = tf.random.uniform(shape = p_h_given_v.shape)
        h_new = tf.cast(p_h_given_v <= p, dtype=tf.float32)
        return h_new, p_h_given_v

    def sample_v_given_h(self, h):
        p_v_given_h = tf.nn.sigmoid(tf.matmul(h, tf.transpose(self.W)) + self.visible_bias)
        p = tf.random.uniform(shape = p_v_given_h.shape)
        v_new = tf.cast(p_v_given_h <= p, dtype=tf.float32)
        return v_new
    
    def train(self, dataset, num_epochs):
        n = len(dataset)
        for epoch in range(num_epochs):
            reconstruction_loss = 0
            for v0 in tqdm(dataset):
                h0, h0_prob = self.sample_h_given_v(v0)
                v_new = self.sample_v_given_h(h0)
                h_new, h_new_prob = self.sample_h_given_v(v0)
                reconstruction_loss += tf.reduce_mean(tf.square(v0 - v_new))
                self.backward(v0, h0_prob, v_new, h_new_prob)
            print(f"Epoch {epoch}/{num_epochs}, Average Loss: {reconstruction_loss/n:.5f}")
        print("Training completed.")

    def backward(self, v0, h0_prob, v_new, h_new_prob):
        d_vb = tf.reduce_sum(v0, axis=0)-tf.reduce_sum(v_new, axis=0)
        d_hb = tf.reduce_sum(h0_prob, axis=0)-tf.reduce_sum(h_new_prob, axis=0)
        d_W = tf.matmul(tf.transpose(v0), h0_prob) - tf.matmul(tf.transpose(v_new), h_new_prob)
        self.W.assign_add(self.learning_rate * d_W)
        self.hidden_bias.assign_add(self.learning_rate * d_hb)
        self.visible_bias.assign_add(self.learning_rate * d_vb)

    def generate(self, num_samples):
        samples = tf.random.uniform(shape=[num_samples, self.num_visible])
        for _ in range(100):  # Perform 100 Gibbs sampling steps for mixing
            hidden_samples, _ = self.sample_h_given_v(samples)
            samples = self.sample_v_given_h(hidden_samples)
        return samples

In [None]:
rbm = RBM(num_visible=int(params['Mattize Size'])**2, num_hidden=64, learning_rate=0.0001)
rbm.train(dataset, num_epochs=4)

# Compare model to monte carlo algorithm
The goal of this project is to recreate probability distribution of spin configurations in Ising Model using deep learning methods <br>
Below few samples from monte carlo dataset are compared with those created by RBM model. They dont have to be the same as we are sampling randomly but they should<br>
be similar on average.

In [None]:
data_generated = rbm.generate(10)
data_sample = list(dataset.take(1))[0][:10]

In [None]:
def compare_plot(data_sample, data_generated):
    n_rows = len(data_sample)
    fig = plt.figure(constrained_layout=True, figsize=(8, 32))
    subfigs = fig.subfigures(nrows=n_rows, ncols=1)

    for index, subfig in enumerate(subfigs):
        axs = subfig.subplots(nrows=1, ncols=2)

        axs[0].imshow(tf.reshape(data_generated[index], (64,64)) )
        axs[0].set_title(f'RBM')

        axs[1].imshow(tf.reshape(data_sample[index], (64,64)) )
        axs[1].set_title(f'Monte Carlo')

In [None]:
compare_plot(data_sample, data_generated)