In [None]:
# Set this variable yourself.
running_on_colab = False
# Store data as reduced density matrix `rho` or eigenvector tuple `EVW`.
rho_or_EVW = 'rho'

# Machine Learning of Many Body Localization

## Imports

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import sys

os.environ['running_on_colab'] = str(running_on_colab)
# running_on_colab = (os.getenv('running_on_colab', 'False') == 'True')

if running_on_colab:
    data_root             = 'drive/MyDrive/Colab Data/MBL/'
    sys.path.append(data_root)
else:
    data_root             = './'

# Store data as reduced density matrix `rho` or eigenvector tuple `EVW`.
os.environ['rho_or_EVW'] = str(rho_or_EVW)
# running_on_colab = (os.getenv('rho_or_EVW', 'EVW') == 'rho')

from file_io import *
from data_gen import *
from plotting import *

In [None]:
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from matplotlib.ticker import MaxNLocator

dpi = 100
fig_w = 1280
fig_h = 640

%matplotlib inline

In [None]:
if running_on_colab:
    !cat /proc/cpuinfo

In [None]:
if running_on_colab:
    !pip install ipython-autotime
    %load_ext autotime

In [None]:
if running_on_colab:
    !pip install pytorch_lightning==0.7.6 torchsummary==1.5.1

## Demo data loading

In [None]:
MBL = {
    "obj_name": 'rho_A',
    "L": 12,
    "n": 2,
    "periodic": True,
    "num_EV": 5,
}
obj_name = MBL['obj_name']
L        = MBL['L']
n        = MBL['n']
periodic = MBL['periodic']
p        = MBL['periodic']
num_EV   = MBL['num_EV']

In [None]:
from MBL_dataset_rho import MBLDatasetRho

train_dataset = MBLDatasetRho(
    MBL_params=MBL,
    train=True,
    transform=transforms.ToTensor(),
)
valid_dataset = MBLDatasetRho(
    MBL_params=MBL,
    train=False,
    transform=transforms.ToTensor(),
)

print('Number of training samples:', len(train_dataset))
print('Number of random samples  :', len(valid_dataset))

In [None]:
# Two classes.
labels = ['Extended (Low W)', 'Localized (High W)']

image, W, label = train_dataset[0]["image"], train_dataset[0]["W"], train_dataset[0]["label"]
print("W: {:.2f}\nLabel: {}".format(W, labels[label]))
print("Shape of the image:", image.size())
print("Smallest value in the image:", torch.min(image))
print("Largest value in the image:", torch.max(image))
# print(image)

In [None]:
print('Visualize training data:')
visualize_dataset_rho(train_dataset)

In [None]:
print('Visualize random data:')
visualize_dataset_rho(valid_dataset)

In [None]:
del train_dataset
del valid_dataset

## Neural network

Since NNs with the same `n` have the same input size, we will evaluate them using the same NN structure. As a side effect, results different `n` are not entirely comparable, but we will compare them anyway because reasons.  

Two classes `MBLModel` and `MBLDataset`, modified from a previous CNN facial recoginition code (own work), are used. The model structure and hyperparameters are defined using a dict called `hparams`. Inside it, specifications of the training data are passed using a nested dict `hparams["MBL"]`. The models are stored in a directory structure that mirrors that of the training data (reduced density matrices $\rho_A$).  

Caveat: Validation data isn't really unseen data from the training distribution $W \in \{0.5, 8\}$, but rather random W's that we'll be using them to predict $W_c$.  

See the other notebook for data generation.  

In [None]:
from MBL_model import MBLModel
model_version = 1

# Two classes.
labels = ['Extended (Low W)', 'Localized (High W)']

In [None]:
# Default parameters that works.
input_size = (1, 2**n, 2**n) # train_dataset[0]["image"].size()
output_size = 2 # [0, 1], two phases. == len(labels)

default_hparams = {
    # MBL Parameters:
    "MBL": None, # Insert later.
    # NN Parameters:
    "input_size" : (1, 2**n, 2**n), # train_dataset[0]["image"].size(),
    "output_size": output_size,
    "weight_decay": 0,
    "batch_size" : 2000,
    "entry_count": 0, # #CNN before inception unit. 
    "group_count": 0, # #Inception units.
    "group_size" : 3, # #CNN in each Inception unit.
    # "exit_count": 2,
    "pool_every": 4,
    "layers_cnn": [
        {
            "in_channels": 1,
            "out_channels": 24,
            "kernel_size": 2,
            "stride": 1,
            "use_max_pool": False,
        },
    ],
    # RuntimeError: size mismatch, m1: [2 x 13254], m2: [53016 x 30]
    # 13254 = ((96-2)/2) ^2 * 6
    "layers_fc": [
        {
            "in_features": 216, # = ((2^n - 2) / 2)^2 * 6
            "out_features": 120,
            "dropout": 0.5,
        },
        {
            "in_features": 120,
            "out_features": output_size,
        },
    ]
}

In [None]:
# Sample training data parameters.
MBL = {
    "obj_name": 'rho_A',
    "L": 8,
    "n": 2,
    "periodic": False,
    "num_EV": 1,
    "rho_train_data_dir": rho_train_data_dir,
    "rho_valid_data_dir": rho_valid_data_dir
}

In [None]:
def get_MBL(L, n, p, num_EV):
    MBL = {
        "obj_name": 'rho_A',
        "L": L,
        "n": n,
        "periodic": p,
        "num_EV": num_EV,
        "rho_train_data_dir": rho_train_data_dir,
        "rho_valid_data_dir": rho_valid_data_dir
    }
    return MBL

In [None]:
def training_loop(default_hparams, MBL, epochs=60, filename='model_v{}.pkl.gz'.format(model_version), save=True):

    hparams = copy.deepcopy(default_hparams)
    hparams['MBL'] = MBL
    # seed_everything(hparams["seed"])
    model = MBLModel(hparams=hparams)
    # model.prepare_data()
    # print(model)

    obj_name = MBL['obj_name']
    L        = MBL['L']
    n        = MBL['n']
    periodic = MBL['periodic']
    p        = MBL['periodic']
    num_EV   = MBL['num_EV']

    if str(device) == 'cpu':
        gpus = 0
    else:
        gpus = -1
    logger = TensorBoardLogger('lightning_logs', name='MBL_v{:d}'.format(model_version))
    scale_accum = 1

    trainer = pl.Trainer(
        gpus=gpus,
        logger=logger,
        max_epochs=epochs,
        min_epochs=10,
        profiler=True,
        # {5: 2, 10: 8} means no accumulation for epochs 1-4. accumulate 2 for epochs 5-10. accumulate 8 after that
        accumulate_grad_batches={
            1 : scale_accum * 1, 
            20: scale_accum * 2, 
            40: scale_accum * 4, 
            80: scale_accum * 8,
        },
        # accumulate_grad_batches=4,
        weights_summary=None # [None,'top','full']
    )

    # print(hparams)
    # for (k, v) in hparams.items():
    #     print(k, v)

    trainer.fit(model)

    if save:
        save_model(model, filename, L, n, periodic, num_EV)
        # model.to(device)

    return model


## Demo training

In [None]:
MBL = get_MBL(L, n, p, num_EV)
model = training_loop(default_hparams, MBL, epochs=10, save=False).to(device)

## Visualize model predictions

In [None]:
def visualize_predictions(model, mode='train'):
    """Mode = ['train' | 'valid']"""

    labels = ['Extended (Low W)', 'Localized (High W)']

    # Sample model predictions.
    result_images  = []
    result_targets = []
    result_Ws      = []
    result_preds   = []
    result_probs   = []

    model.eval()
    SM  = torch.nn.Softmax()
    LSM = torch.nn.LogSoftmax()
    if mode == 'train':
        dataloader = DataLoader(model.dataset["train"], batch_size=25, shuffle=True)#, pin_memory=True)
    else:
        dataloader = DataLoader(model.dataset["val"], batch_size=25, shuffle=True)#, pin_memory=True)

    for batch in dataloader:

        images, targets, Ws = batch["image"], batch["label"], batch["W"]
        images  = images.to(device)
        outputs = model(images)
        images  = images.to('cpu')
        outputs = outputs.to('cpu')

        preds   = outputs.argmax(axis=1)
        probs   = SM(outputs)
        # probs2  = - LSM(outputs)
        # out_sum = probs2[:,0] + probs2[:,1]
        # probs2[:,0] = probs2[:,0] / out_sum
        # probs2[:,1] = probs2[:,1] / out_sum
        # Simple averaging doesn't work, because it's negative...
        # out_sum = outputs[:,0] + outputs[:,1]
        # outputs[:,0] = outputs[:,0] / out_sum
        # outputs[:,1] = outputs[:,1] / out_sum
        shape   = images.shape
        result_images  = result_images  + images.reshape(shape[0], shape[2], shape[3]).tolist()
        result_targets = result_targets + targets.tolist()
        result_Ws      = result_Ws      + Ws.tolist()
        result_preds   = result_preds   + preds.tolist()
        result_probs   = result_probs   + probs.tolist()
        # result_probs   = result_probs   + probs2.tolist()
        break # Because we only need 25 images.

    # Display images.
    sample_idx = np.random.randint(0, len(result_preds), size=5*5)

    fig, axes = plt.subplots(5, 5, figsize=(fig_w/dpi,fig_h/dpi*2), dpi=dpi, squeeze=False)

    for i, idx in enumerate(sample_idx):
        axes[i%5,i//5].imshow(np.abs(result_images[idx]))
        W      = result_Ws[idx]
        W_in   = result_targets[idx]
        W_pred = result_preds[idx]
        W_prob = result_probs[idx]
        annotation  = 'Input  : \n{}\nW={:.2f}\n\n'.format(labels[W_in], W)
        annotation += 'Predict: \n{}\n{:.0f}%'.format(labels[W_pred], W_prob[W_pred]*100)
        # annotation += 'Predict: \n{}\n{:.0f}%'.format(labels[W_pred], W_prob[(W_pred+1)%2]*100)
        if W_in == W_pred:
            ec = 'lime'
        else:
            ec = 'red'
        axes[i%5,i//5].annotate(annotation, (0.5,0.275), xycoords='axes fraction', ha='center', color='w', bbox=dict(facecolor='none', edgecolor=ec, boxstyle='round,pad=1', linewidth=2))

    for axe in axes:
        for ax in axe:
            # ax.legend(loc='best')
            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            ax.xaxis.set_visible(False)
            ax.yaxis.set_visible(False)

    fig.tight_layout()


### Sample training data

In [None]:
visualize_predictions(model, 'train')

### Sample validation data

In [None]:
visualize_predictions(model, 'valid')

### Model performance

In [None]:
def evaluate_model_core(model, dataset):

    model.eval()
    criterion = torch.nn.CrossEntropyLoss()
    SM = torch.nn.Softmax()
    dataloader = DataLoader(dataset, batch_size=1000, shuffle=False)#, pin_memory=True)
    loss = 0
    n_correct = 0

    for batch in dataloader:
        images, targets = batch["image"], batch["label"]
        images  = images.to(device)
        outputs = model(images).to('cpu')
        preds   = outputs.argmax(axis=1)
        # print(SM(outputs))
        loss += criterion(outputs, targets).item()
        n_correct += (preds == targets).sum().item()

    return loss, n_correct / len(dataset)

def evaluate_model(model):

    print("Training accuracy  : {:.4f}%".format(evaluate_model_core(model, model.dataset["train"])[1] * 100))
    print("Validation accuracy: {:.4f}%".format(evaluate_model_core(model, model.dataset["val"])[1]   * 100))


In [None]:
evaluate_model(model)

## Estimate transition disorder strength

In [None]:
def sigmoid(x, x0, y0, b):
    y = 1 / (1 + np.exp(-b * (x - x0))) + y0
    return y

# Logit function is the inverse of sigmoid.
def logit(y, x0, y0, b):
    x = np.log((y - y0) / (1 - (y - y0))) / b + x0
    return x


In [None]:
sigmoid(0,0,0,1)

In [None]:
logit(0.5,0,0,1)

In [None]:
# Remove y0 because it should be bounded/aligned with y = 0 and y = 1.
def sigmoid(x, x0, b):
    y = 1 / (1 + np.exp(-b * (x - x0))) # + y0
    return y

# Logit function is the inverse of sigmoid.
def logit(y, x0, b):
    x = np.log((y) / (1 - (y))) / b + x0
    return x


In [None]:
def calc_probs(model, dataset):

    # Sample model predictions.
    result_images  = []
    result_targets = []
    result_Ws      = []
    result_preds   = []
    result_probs   = []

    model.eval()
    SM  = torch.nn.Softmax()
    LSM = torch.nn.LogSoftmax()
    dataloader = DataLoader(dataset, batch_size=200, shuffle=False)#, pin_memory=True)
    for batch in dataloader:

        images, targets, Ws = batch["image"], batch["label"], batch["W"]
        images  = images.to(device)
        outputs = model(images)
        images  = images.to('cpu')
        outputs = outputs.to('cpu')

        preds   = outputs.argmax(axis=1)
        Ps      = SM(outputs)
        shape   = images.shape
        result_images  = result_images  + images.reshape(shape[0], shape[2], shape[3]).tolist()
        result_targets = result_targets + targets.tolist()
        result_Ws      = result_Ws      + Ws.tolist()
        result_preds   = result_preds   + preds.tolist()
        result_probs   = result_probs   + Ps.tolist()

    result_Ws    = np.array(result_Ws)
    result_probs = np.array(result_probs)
    sorted_idx   = result_Ws.argsort()
    Ws = result_Ws[sorted_idx]
    Ps = result_probs[sorted_idx]

    # Compute mean and std.
    Ws_dict = OrderedDict()
    Ws_uniq = []
    Ps_mean = []
    Ps_std  = []
    # Ws is already sorted in `calc_probs()`.
    for W, P in zip(Ws, Ps[:,1]):
        if W not in Ws_dict:
            Ws_dict[W] = []
        Ws_dict[W].append(P)
    for (W, P) in Ws_dict.items():
        Ws_uniq.append(W)
        Ps_mean.append(np.mean(P))
        Ps_std.append(np.std(P, ddof=1))

    return Ws, Ps, np.array(Ws_uniq), np.array(Ps_mean), np.array(Ps_std)


In [None]:
def plot_crossing(Ws, Ps, Ws_uniq, Ps_mean, Ps_std):

    labels = ['Extended (Low W)', 'Localized (High W)']

    # Plot probability P(Localized) against W.
    fig, axes = plt.subplots(1, 1, figsize=(fig_w/dpi,fig_h/dpi), dpi=dpi, squeeze=False)

    # Plot averaged values with error bars.
    markers, caps, bars = axes[0,0].errorbar(Ws_uniq, Ps_mean, Ps_std, ls=' ', marker='x',capsize=2, capthick=2, label='P(Localized) Mean')
    # Loop through bars and caps and set the alpha value
    [bar.set_alpha(0.5) for bar in bars]
    [cap.set_alpha(0.5) for cap in caps]

    # Plot raw data.
    axes[0,0].plot(Ws, Ps,   ls=' ', marker='x', label='P(Localized)', alpha=0.1)
    # axes[0,0].plot(Ws, probs[:,0], ls=' ', marker='x', label='P(Extended)  (W small)')
    axes[0,0].set_title('Probability vs W (L={}, n={})'.format(MBL['L'], MBL['n']))
    axes[0,0].set_xlabel('W')
    axes[0,0].set_ylabel('Probability')

    # Curve fit a sigmoid using all data.
    # Fitting only the mean with `Ws_uniq` and `Ps_mean` gives identical results.
    # popt, pcov = curve_fit(sigmoid, Ws, Ps, p0=[3, 0, 2]) # Add bounds or initial values if it doesn't converge.
    popt, pcov = curve_fit(sigmoid, Ws, Ps, p0=[3, 2]) # Add bounds or initial values if it doesn't converge.
    # x0, y0, b = popt
    x0, b = popt
    x = np.linspace(0, 10, 100)
    y = sigmoid(x, *popt)
    axes[0,0].plot(x, y, ls='--', label='Fit')
    # print('Fitted sigmoid function 1 / (1 + Exp(-{:.4f} (x - {:.4f}))) + {:.4f}'.format(b, x0, y0))
    print('Fitted sigmoid function 1 / (1 + Exp(-{:.4f} (x - {:.4f})))'.format(b, x0))

    W_c = logit(0.5, *popt)
    perr = np.sqrt(np.diag(pcov))
    print('Transition W_C is found to be at W = {:.4f} ± {:.4f}'.format(W_c, perr[0]))
    axes[0,0].axvline(W_c, c='r',         ls='--', label='$W_c$')
    axes[0,0].axhline(0.5, c='lightgrey', ls='--', label='$P=0.5$')

    for axe in axes:
        for ax in axe:
            ax.legend(loc='best')


In [None]:
train_Ws, train_Ps, train_Ws_uniq, train_Ps_mean, train_Ps_std = calc_probs(model, model.dataset["train"])
# for model, dataset in zip(models, datasets):
#     Ws, probs = calc_probs(model, dataset)

In [None]:
plot_crossing(train_Ws, train_Ps[:,1], train_Ws_uniq, train_Ps_mean, train_Ps_std)

In [None]:
valid_Ws, valid_Ps, valid_Ws_uniq, valid_Ps_mean, valid_Ps_std = calc_probs(model, model.dataset["val"])
# for model, dataset in zip(models, datasets):
#     Ws, probs = calc_probs(model, dataset)

In [None]:
plot_crossing(valid_Ws, valid_Ps[:,1], valid_Ws_uniq, valid_Ps_mean, valid_Ps_std)

## Batch training
The for-loop should be comparable to the one used to generate reduced density matrices.

In [None]:
del model.dataset["train"]
del model.dataset["val"]
del model

In [None]:
# Batch generate reduced density matrix.
n  = 2                   # !!! Important !!! Number of consecutive sites.
k  = 5                   # Number of eigenvalues near zero to save.
J  = 1                   # Always = 1
Ls = list(range(8,13,2)) # System sizes L.
ps = [False, True]       # Periodic or not.
et = []                  # Execution time.
num_EVs = [k]            # Number of eigenvalues near zero to save.
model_filename = 'model_v{}.pkl.gz'.format(model_version)

for L in Ls:
    for num_EV in num_EVs:
        for p in ps:
            start_time = time.time()
            print('{} | Training model for L={:02d} | n={:02d} | periodic={: <5} | num_EV={} ...'.format(dt(), L, n, str(p), num_EV), flush=True)

            if model_exists(model_filename, L, n, p, num_EV):
                print('Model exists. Training skipped.', flush=True)
            else:
                MBL = get_MBL(L, n, p, num_EV)
                try:
                    model = training_loop(default_hparams, MBL).to(device)
                    data = calc_probs(model, model.dataset["val"])
                    save_eval_valid(data, model_version, L, n, p, num_EV)
                    del model.dataset["train"]
                    del model.dataset["val"]
                    del model
                except RuntimeError as err:
                    print('RuntimeError: {0}'.format(err), flush=True)
                    print('Insufficient data. Training skipped.', flush=True)

            exec_time = time.time() - start_time
            et.append(exec_time)
            print('{} | Computed: L={:02d} | n={:02d} | periodic={: <5} | num_EV={}.'.format(dt(), L, n, str(p), num_EV), flush=True)
            print('{} | Execution took {: 8.2f}s or {: 6.2f}min.'.format(dt(), exec_time, exec_time/60), flush=True)
            print(' ', flush=True)

# if check_shutdown_signal():
#     break

In [None]:
# Code for "annealing".
# hparams["use_adam"] = 1
# model_adam = MBLModel(hparams=hparams)
# model_adam.prepare_data()
# model_adam.load_state_dict(model.state_dict())

In [None]:
# Ws, Ps, Ws_uniq, Ps_mean, Ps_std = load_eval_valid(model_version, L, n, p, num_EV)