In [None]:
%matplotlib inline

In [None]:
import warnings

warnings.simplefilter("ignore")

In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from keras import optimizers
from keras.callbacks import EarlyStopping
from keras.layers import (
    Activation,
    Conv2D,
    Dense,
    Dropout,
    Flatten,
    Input,
    MaxPooling2D,
)
from keras.models import Model

# PHYS 395 - week 9

**Matt Wiens - #301294492**

This notebook will be organized similarly to the lab script, with major headings corresponding to the headings on the lab script.

*The TA's name (Ignacio) will be shortened to "IC" whenever used.*

## Setup 

In [None]:
# Set default plot size
plt.rcParams["figure.figsize"] = (12, 9)

In [None]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999

# Artificial neural nets (ANNs)

Here we're going to be training a model to recognize when Ising model-like data is at configurations above, at, or below critical temperatures.

## Reading in data

First we'll read in Ising model data.

In [None]:
# Lattice size
L = 32

# Critical temp
T_c = 2.27

In [None]:
# Data directory relative paths
data_dir = "Ising"
data_sub_dirs = [
    "T1.5",
    "T2.0",
    "T2.1",
    "T2.2",
    "T2.25",
    "T2.26",
    "T2.27",
    "T2.28",
    "T2.29",
    "T2.3",
    "T2.4",
    "T2.5",
    "T2.7",
    "T3.0",
]

In [None]:
# Read in all Ising model data
X = []  # images
Y = []  # labels
temps = []  # temperatures

for sub_dir in data_sub_dirs:
    temp = float(sub_dir[1:])
    label = 1 if temp <= T_c else 0

    for i in range(1, 501):
        # Read in data
        x = np.genfromtxt(os.path.join(data_dir, sub_dir, "img" + str(i) + ".dat"))
        x = np.reshape(x, (L, L, 1))

        # Store data
        X.append(x)
        Y.append(label)
        temps.append(temp)

# Convert X, Y, and temperature lists into arrays
X = np.array(X)
Y = np.array(Y)
temps = np.array(temps)

Let's verify the size of our data.

In [None]:
print("X shape: %s" % list(X.shape))
print("Y shape: %s" % list(Y.shape))

And let's have a look at some of the images in each of the following regimes: below, near, or above $T_c$.

In [None]:
low_idxs = [50, 550, 1050]
near_idxs = [3001, 3102, 3403]
above_idxs = [5405, 6110, 6991]

**Low temperatures**

In [None]:
_, axes = plt.subplots(1, 3, figsize=[20, 20 / 3])

for idx, ax in enumerate(axes):
    i = low_idxs[idx]

    ax.imshow(X[i, :, :, 0])
    ax.set_title("T = %s" % temps[i])

Here we can see pretty clearly that at very low temperature, essentially all of the spins are aligned. As we increase temperature, we get small deviations from the majority of aligned spins. This deviations get larger the higher the temperature in this regime.

**Near critical temperature**

In [None]:
_, axes = plt.subplots(1, 3, figsize=[20, 20 / 3])

for idx, ax in enumerate(axes):
    i = near_idxs[idx]

    ax.imshow(X[i, :, :, 0])
    ax.set_title("T = %s" % temps[i])

Here we see a similar trend as before at the critical temperature. At appears that the majority of spins are aligned, while some, but a significant amount, are not.

**Above critical temperature**

In [None]:
_, axes = plt.subplots(1, 3, figsize=[20, 20 / 3])

for idx, ax in enumerate(axes):
    i = above_idxs[idx]

    ax.imshow(X[i, :, :, 0])
    ax.set_title("T = %s" % temps[i])

Now it seems that the spins have close to a net zero sum, which seems to hold better the higher we increase the temperature.

## Separating data into training, test, and validation sets

Now we'll split our data using a 70/15/15 distribution of our data into training, test, and validation sets.

In [None]:
N = X.shape[0]

N_train = int(N * 0.70)
N_test = int(N * 0.15)
N_valid = int(N * 0.15)

In [None]:
# Shuffle indices
idxs = np.random.permutation(N)

idxs_train = idxs[:N_train]
idxs_test = idxs[N_train : N_train + N_test]
idxs_valid = idxs[N_train + N_test :]

# Now partition our data using these indices
# into training, test, and validation sets
X_train = X[idxs_train, :, :, :]
Y_train = Y[idxs_train]
temps_train = temps[idxs_train]

X_test = X[idxs_test, :, :, :]
Y_test = Y[idxs_test]
temps_test = temps[idxs_test]

X_valid = X[idxs_valid, :, :, :]
Y_valid = Y[idxs_valid]
temps_valid = temps[idxs_valid]

### Augmenting the training dataset

We can also make use of the fact that our Ising configurations are symmetric to generate more data.

In [None]:
# Flag indicating whether to add augmented data
use_augmented_data = False

In [None]:
if use_augmented_data:
    X_data_to_add = np.zeros((2 * N_train, L, L, 1))
    Y_data_to_add = np.zeros(2 * N_train)
    temp_data_to_add = np.zeros(2 * N_train)

    for i, (x, y, temp) in enumerate(zip(X_train, Y_train, temps_train)):
        X_data_to_add[2 * i, :, :, :] = X_train[i, :, ::-1, :]
        X_data_to_add[2 * i + 1, :, :, :] = X_train[i, ::-1, :, :]

        Y_data_to_add[2 * i] = y
        Y_data_to_add[2 * i + 1] = y

        temp_data_to_add[2 * i] = temp
        temp_data_to_add[2 * i + 1] = temp
        
    X_train = np.concatenate((X_train, X_data_to_add))
    Y_train = np.concatenate((Y_train, Y_data_to_add))
    temps_train = np.concatenate((temps_train, temp_data_to_add))

## Dense Neural Network architecture

First we'll set up a DNN using Keras.

### Compiling the model

In [None]:
lr = 0.01
dropout = 0.5

In [None]:
inputs = Input(shape=(L, L, 1))

x = Flatten()(inputs)

x = Dense(512)(x)
x = Activation("relu")(x)
x = Dropout(dropout)(x)

x = Dense(256)(x)
x = Activation("relu")(x)
x = Dropout(dropout)(x)

x = Dense(1)(x)

outputs = Activation("sigmoid")(x)

model = Model(inputs=inputs, outputs=outputs)

sgd = optimizers.SGD(lr=lr)

model.compile(optimizer=sgd, loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
early_stopping = EarlyStopping(monitor="val_loss", patience=3)

Let's show a summary of the model we compiled.

In [None]:
model.summary()

Here we have ~650,000 trainable parameters.

### Training the model

Now we'll train the model with our data.

In [None]:
history = model.fit(
    X_train,
    Y_train,
    validation_data=[X_valid, Y_valid],
    batch_size=32,
    epochs=20,
    callbacks=[early_stopping],
)

### Testing the model

Now let's test the model on our test data.

In [None]:
vals = model.evaluate(X_test, Y_test)

Let's show the loss and accuracy.

In [None]:
for name, val in zip(model.metrics_names, vals):
    print("%s: %.2f" % (name, val))

Now let's plot the average prediction values as a function of temperature. 

In [None]:
pY = model.predict(X_test)

In [None]:
df_pY = pd.DataFrame({"pY": pY[:, 0], "T": temps_test})
df_pY_avg = df_pY.groupby("T", as_index=False).mean()

In [None]:
_, ax = plt.subplots()

ax.plot(df_pY_avg.T.values[0, :], df_pY_avg.T.values[1, :], "*-")

ax.set_xlabel("T")
ax.set_ylabel(r"$p_Y$")
ax.set_ylim([0, 1]);

The interesting region of this plot (where there is the most uncertainty) definitely seems to correspond to where the critical temperature is.

### Adjusting the learning rate

We can adjust the `lr` value in our model from `0.01` to `0.001` as suggested in the lab script. The lab script suggests that this would lead to worse performance but better accuracy. I ran both of these settings a few time and honestly (1) found the performance impact negligible, (2) didn't notice a significant difference in accuracy.

Probably lowering the `lr` value even further would make a difference I would notice.

### Tuning network parameters

#### Adding an extra layer

First let's see the effect of adding in another layer. We can do this by adding the following code to our model code (right before `x = Dense(1)(x)`):

```
x = Dense(512)(x)
x = Activation("relu")(x)
x = Dropout(dropout)(x)
```

In my runs, accuracy did *not* improve (at least I didn't notice an improvement) from adding an extra later.

#### Adjusting the dropout values

We can also try lowering the dropout values. Using values of `0.1` and `0.3`, it appeared that the greater the number of iterations done during fitting. However, the accuracy still didn't substantially increase.

### Augmenting the training dataset

We can augment our training data by making use of symmetries in the Ising model. Code for this is above, you just need to rerun the notebook and set `use_augmented_data` to `True`.

I ran through several runs of this, and on average I saw a ~2% increase in accuracy, which is a lot more than I saw for either of the other methods is above.