In [None]:
in_colab = False

In [None]:
if in_colab:
    ED_data_dir = 'drive/MyDrive/Colab Data/CMMP/ED_data'
    rho_train_data_dir = 'drive/MyDrive/Colab Data/CMMP/rho_train_data'
    rho_valid_data_dir = 'drive/MyDrive/Colab Data/CMMP/rho_valid_data'
    signal_dir = 'drive/MyDrive/Colab Data/CMMP'
    model_dir  = 'drive/MyDrive/Colab Data/CMMP/models'
    eval_valid_data_dir = 'drive/MyDrive/Colab Data/CMMP/eval_valid_data'
    H_model_dir  = 'drive/MyDrive/Colab Data/CMMP/H_models'
    H_eval_valid_data_dir = 'drive/MyDrive/Colab Data/CMMP/H_eval_valid_data'
else:
    ED_data_dir = 'ED_data'
    rho_train_data_dir = 'rho_train_data'
    rho_valid_data_dir = 'rho_valid_data'
    signal_dir = '.'
    model_dir  = 'models'
    eval_valid_data_dir = 'eval_valid_data'
    H_model_dir  = 'H_models'
    H_eval_valid_data_dir = 'H_eval_valid_data'

# Project 13*: Machine Learning of Many Body Localization (Exact diagonalization + Machine Learning)

Use exact diagonalization to obtain all eigenstates of the the Heisenberg model with a
random field, 

\begin{equation}
    H = J \sum_i \vec{S}_{i} \cdot \vec{S}_{i+1} - \sum_i h_i S^z_i
\end{equation}

, where the values of the field $ h_i \in [-W, W] $ are chosen from a uniform random distribution with a "disorder strength" $W$ (Use moderate system sizes $L = [10, 12]$). 

The exciting property of this model is that it is believed to undergo a phase transition from an extended phase (small $W$) to a localized phase (large $W$). 

We will use ML to detect this transition: Pick a number of eigenstates that are near energy $E = 0$ and obtain the reduced density matrices $\rho^A$, where $A$ is a region of $n$ consecutive spins (a few hundred to thousands eigenstates for different disorder realizations). 

Now use the density matrices for $W = 0.5 J$ and $W = 8.0 J$ to train a neural network (just interpret the entries of $\rho^A$ as an image with $2^n \times 2^n$ pixel). Then use this network and study the output of the neural network for different $W$. 

How does the results depend on system size $L$ and block size $n$? At which $W_c$ do you expect the
transition to occur?

_Author: Tin Kei CHENG_  
_TUM, 2021_

## Imports

In [None]:
import os
import sys
import copy
import json
import gzip
import lzma
import pytz
import time
import tqdm
import pickle
import numpy as np
from numba import jit, njit # Set "nopython" mode for best performance, equivalent to @njit # cache=True, parallel=True
from datetime import datetime

tz = pytz.timezone('Europe/Berlin')

import scipy
import scipy.linalg
import scipy.sparse.linalg

from scipy.sparse import csr_matrix, kron, identity
from scipy.sparse.linalg import eigsh
from scipy.linalg import svd
from scipy.optimize import curve_fit

from collections import OrderedDict

%load_ext autoreload
%autoreload 2

In [None]:
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from matplotlib.ticker import MaxNLocator

dpi = 100
fig_w = 1280
fig_h = 640

%matplotlib inline

In [None]:
if in_colab:
    !cat /proc/cpuinfo

In [None]:
if in_colab:
    !pip install ipython-autotime
    %load_ext autotime

In [None]:
if in_colab:
    !pip install pytorch_lightning==0.7.6 torchsummary==1.5.1
    !pip install torch==1.4.0+cu92 torchvision==0.5.0+cu92 -f https://download.pytorch.org/whl/torch_stable.html

In [None]:
# pip install torch==1.4.0+cu92 torchsummary==1.5.1 torchvision==0.5.0+cu92 pytorch-lightning==0.7.6  -f https://download.pytorch.org/whl/torch_stable.html
import torch
from torch.utils.data import DataLoader
from torchvision import transforms

import pytorch_lightning as pl
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
# from torchsummary import summary
# help(summary)

import warnings
warnings.filterwarnings('ignore')

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = "cpu"
print(device)
# python -c "import torch; print(torch.__version__)"

## Util functions

In [None]:
def dt():
    return datetime.now(tz=tz).strftime('%Y-%m-%d %H:%M:%S')

def dict_to_str(_dict):
    
    od = OrderedDict(sorted(_dict.items())) # Sort keys.
    s = json.dumps(od) # Turn dict into str.
    s = s[1:-1].replace('\"', '').replace(' ', '') # Replace some special characeters.
    s = ''.join(x if x.isalnum() else ('=' if x == ':' else '_') for x in s) # Replace all remaining special characters.

    return s


def save_cache(obj, obj_name, obj_params, cache_dir='cache'):
    """Cache an object, together with the parameters used to generate it.
    For `obj_params`, try not to use nested dict or with complicated objects.

    Parameters
    ----------
    obj : object
        An `object` you want to cache.
    obj_name : str
        A unique name you give to this object.
    obj_params : dict
        A `dict` of all parameters necessary to generate this object.
    cache_dir : str, optional
        Directory where the cache is located.
    """

    param_str = dict_to_str(obj_params)
    os.makedirs(os.path.join(cache_dir, obj_name), exist_ok=True)
    with gzip.open(os.path.join(cache_dir, obj_name, param_str + '.pkl.gz'), 'wb') as handle:
        pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)


def load_cache(obj_name, obj_params, cache_dir='cache'):
    """Check if the object is cached. If not, return None.
    For `obj_params`, try not to use nested dict or with complicated objects.

    Parameters
    ----------
    obj_name : str
        A unique name you give to this object.
    obj_params : dict
        A `dict` of all parameters necessary to generate this object.
    cache_dir : str, optional
        Directory where the cache is located.
    """

    param_str = dict_to_str(obj_params)
    os.makedirs(os.path.join(cache_dir, obj_name), exist_ok=True)
    if os.path.isfile(os.path.join(cache_dir, obj_name, param_str + '.pkl.gz')):
        with gzip.open(os.path.join(cache_dir, obj_name, param_str + '.pkl.gz'), 'rb') as handle:
            obj = pickle.load(handle)
            return obj
    else:
        return None


def check_shutdown_signal(signal_dir=signal_dir):
    """To gracefully stop generating data by making sure a loop is completed, this function will read a text file in a directory for the value `1`.
    
    Return
    ------
    shutdown : bool
        Shutdown signal detected.
    """

    os.makedirs(os.path.join(signal_dir), exist_ok=True)
    if os.path.isfile(os.path.join(signal_dir, 'shutdown_signal.txt')):
        with open(os.path.join(signal_dir, 'shutdown_signal.txt')) as f:
            lines = f.readlines()
        if lines is not None and len(lines) > 0:
            lines = [x.strip() for x in lines]
            if lines[0] == '1':
                return True

    return False


@njit
def is_sorted(arr):
    return np.all(arr[:-1] <= arr[1:])


def save_ED(obj, obj_name, L, W, periodic, data_dir=ED_data_dir):
    """Save a list of exact diagonalization results, organized by the parameters used to generate them.
    For `obj_params`, try not to use nested dict or with complicated objects.

    Parameters
    ----------
    obj : list
        A list of lists, where each reduced density matrix is paired with its disorder strength W.
        Number of data must be a multiple of 10.
    obj_name : str
        A unique name you give to this object. Call it `rho_A`.
    L : int
        System size.
    W : float
        Disorder strength.
    periodic : bool
        Whether the Hamiltonian is periodic.
    data_dir : str, optional
        Directory where the data is saved.
    """

    directory = os.path.join(data_dir, obj_name, 'L={:02d}'.format(L), 'W={:.2f}'.format(W), 'periodic={}'.format(periodic))
    os.makedirs(directory, exist_ok=True)

    # Check if file exists, and increment suffix.
    i = 0
    while os.path.exists(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i))):
        i += 1

    with gzip.open(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i)), 'wb') as handle:
        pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)


def load_ED(obj_name, L, W, periodic, data_dir=ED_data_dir):
    """Check if the object is cached. If not, return None.
    For `obj_params`, try not to use nested dict or with complicated objects.

    Parameters
    ----------
    obj_name : str
        A unique name you give to this object. Call it `rho_A`.
    L : int
        System size.
    W : float
        Disorder strength.
    periodic : bool
        Whether the Hamiltonian is periodic.
    data_dir : str, optional
        Directory where the data is saved.
    """

    raise NotImplementedError('Function not implemented.')

    param_str = dict_to_str(obj_params)
    os.makedirs(os.path.join(data_dir, obj_name, 'L={:2d}'.format(L), 'W={:.2d}'.format(W)), exist_ok=True)
    if os.path.isfile(os.path.join(data_dir, obj_name, 'L={:2d}'.format(L), 'W={:.2d}'.format(W), param_str + '.pkl.gz')):
        with gzip.open(os.path.join(data_dir, obj_name, 'L={:2d}'.format(L), 'W={:.2d}'.format(W), param_str + '.pkl.gz'), 'rb') as handle:
            obj = pickle.load(handle)
            return obj
    else:
        return None


In [None]:
def save_rho_train(obj, obj_name, L, n, periodic, num_EV, data_dir=rho_train_data_dir):
    """Save a list of reduced density matrices with W = {0.5, 8}.

    Parameters
    ----------
    obj : list
        A list of lists, where each reduced density matrix is paired with its disorder strength W.
        i.e. obj[i][0] is a 2D numpy.ndarray of the reduced density matrix, and obj[i][1] is the disorder strength used to generate it.
        Number of data must be a multiple of 10.
    obj_name : str
        A name you give to this object. Call it `rho_A`.
    L : int
        System size.
    n : int
        Number of consecutive spins sampled.
    periodic : bool
        Whether the Hamiltonian is periodic.
    num_EV : int
        Number of eigenvalues around zero being sampled.
    data_dir : str, optional
        Directory where the data is saved.
    """

    directory = os.path.join(data_dir, 'L={:02d}'.format(L), 'n={:02d}'.format(n), 'periodic={}'.format(periodic), 'num_EV={}'.format(num_EV))
    os.makedirs(directory, exist_ok=True)

    # Check if file exists, and increment suffix.
    i = 0
    while os.path.exists(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i))):
        i += 1

    with gzip.open(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i)), 'wb') as handle:
        pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)


def save_rho_valid(obj, obj_name, L, n, periodic, num_EV, data_dir=rho_valid_data_dir):
    """Save a list of reduced density matrices with random W != {0.5, 8}.

    Parameters
    ----------
    obj : list
        A list of lists, where each reduced density matrix is paired with its disorder strength W.
        i.e. obj[i][0] is a 2D numpy.ndarray of the reduced density matrix, and obj[i][1] is the disorder strength used to generate it.
        Number of data must be a multiple of 10.
    obj_name : str
        A name you give to this object. Call it `rho_A`.
    L : int
        System size.
    n : int
        Number of consecutive spins sampled.
    periodic : bool
        Whether the Hamiltonian is periodic.
    num_EV : int
        Number of eigenvalues around zero being sampled.
    data_dir : str, optional
        Directory where the data is saved.
    """

    directory = os.path.join(data_dir, 'L={:02d}'.format(L), 'n={:02d}'.format(n), 'periodic={}'.format(periodic), 'num_EV={}'.format(num_EV))
    os.makedirs(directory, exist_ok=True)

    # Check if file exists, and increment suffix.
    i = 0
    while os.path.exists(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i))):
        i += 1

    with gzip.open(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i)), 'wb') as handle:
        pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [None]:
def load_rho_train(obj_name, L, n, periodic, num_EV, data_dir=rho_train_data_dir):
    """Load a list of reduced density matrices with W = {0.5, 8}.

    Parameters
    ----------
    obj_name : str
        A name you give to this object. Call it `rho_A`.
    L : int
        System size.
    n : int
        Number of consecutive spins sampled.
    periodic : bool
        Whether the Hamiltonian is periodic.
    num_EV : int
        Number of eigenvalues around zero being sampled.
    data_dir : str, optional
        Directory where the data is saved.

    Return
    ------
    obj : list
        A list of lists, where each reduced density matrix is paired with its disorder strength W.
        i.e. obj[i][0] is a 2D numpy.ndarray of the reduced density matrix, and obj[i][1] is the disorder strength used to generate it.
        Number of data must be a multiple of 10.
    """

    directory = os.path.join(data_dir, 'L={:02d}'.format(L), 'n={:02d}'.format(n), 'periodic={}'.format(periodic), 'num_EV={}'.format(num_EV))
    os.makedirs(directory, exist_ok=True)

    # Check if file exists, load the file, and increment suffix.
    i = 0
    data = []
    while os.path.exists(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i))):
        with gzip.open(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i)), 'rb') as handle:
            data = data + pickle.load(handle)
        i += 1

    return data


def load_rho_valid(obj_name, L, n, periodic, num_EV, data_dir=rho_valid_data_dir):
    """Load a list of reduced density matrices with random W != {0.5, 8}.

    Parameters
    ----------
    obj_name : str
        A name you give to this object. Call it `rho_A`.
    L : int
        System size.
    n : int
        Number of consecutive spins sampled.
    periodic : bool
        Whether the Hamiltonian is periodic.
    num_EV : int
        Number of eigenvalues around zero being sampled.
    data_dir : str, optional
        Directory where the data is saved.

    Return
    ------
    obj : list
        A list of lists, where each reduced density matrix is paired with its disorder strength W.
        i.e. obj[i][0] is a 2D numpy.ndarray of the reduced density matrix, and obj[i][1] is the disorder strength used to generate it.
        Number of data must be a multiple of 10.
    """

    directory = os.path.join(data_dir, 'L={:02d}'.format(L), 'n={:02d}'.format(n), 'periodic={}'.format(periodic), 'num_EV={}'.format(num_EV))
    os.makedirs(directory, exist_ok=True)

    # Check if file exists, load the file, and increment suffix.
    i = 0
    data = []
    while os.path.exists(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i))):
        with gzip.open(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i)), 'rb') as handle:
            data = data + pickle.load(handle)
        i += 1

    return data


In [None]:
def save_H_model(model, file_name, L, periodic, directory=H_model_dir):
    """Save model as pickle"""

    model = model.cpu()
    model_dict = {
        "state_dict": model.state_dict(),
        "hparams": model.hparams
    }

    directory = os.path.join(directory, 'L={:02d}'.format(L), 'periodic={}'.format(periodic))
    os.makedirs(directory, exist_ok=True)

    model_path = os.path.join(directory, file_name)
    with gzip.open(model_path, 'wb', 4) as handle:
        pickle.dump(model_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    return model_path


def load_H_model(file_name, L, periodic, directory=H_model_dir):

    directory = os.path.join(directory, 'L={:02d}'.format(L), 'periodic={}'.format(periodic))
    os.makedirs(directory, exist_ok=True)

    model_path = os.path.join(directory, file_name)
    with gzip.open(model_path, 'rb') as fp:
        model_params = pickle.load(fp)

    hparams = model_params["hparams"]
    model = MBLHModel(hparams=hparams)
    model.load_state_dict( model_params["state_dict"] )
    model.prepare_data()

    return model.to(device)


def H_model_exists(file_name, L, periodic, directory=H_model_dir):

    directory = os.path.join(directory, 'L={:02d}'.format(L), 'periodic={}'.format(periodic))
    os.makedirs(directory, exist_ok=True)

    model_path = os.path.join(directory, file_name)
    return os.path.exists(model_path)


In [None]:
def save_H_eval_valid(obj, model_version, L, periodic, data_dir=H_eval_valid_data_dir):
    """Save model predictions of random W != {0.5, 8}.

    Parameters
    ----------
    obj : list
        A list of five numpy.ndarray [Ws, Ps, Ws_uniq, Ps_mean, Ps_std]
        Where `W` are disorder strength, and `P` the probability of being in the localized phase.
    model_version : int
        Version of the neural network model.
    L : int
        System size.
    periodic : bool
        Whether the Hamiltonian is periodic.
    data_dir : str, optional
        Directory where the data is saved.
    """

    directory = os.path.join(data_dir, 'L={:02d}'.format(L),'periodic={}'.format(periodic))
    os.makedirs(directory, exist_ok=True)

    with gzip.open(os.path.join(directory, 'H_model_v{}_eval.pkl.gz'.format(model_version)), 'wb') as handle:
        pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)


def load_H_eval_valid(model_version, L, periodic, data_dir=H_eval_valid_data_dir):
    """Load model predictions of random W != {0.5, 8}.

    Parameters
    ----------
    model_version : int
        Version of the neural network model.
    L : int
        System size.
    periodic : bool
        Whether the Hamiltonian is periodic.
    data_dir : str, optional
        Directory where the data is saved.

    Return
    ------
    obj : list
        A list of five numpy.ndarray [Ws, Ps, Ws_uniq, Ps_mean, Ps_std]
        Where `W` are disorder strength, and `P` the probability of being in the localized phase.
    """

    directory = os.path.join(data_dir, 'L={:02d}'.format(L), 'periodic={}'.format(periodic))
    os.makedirs(directory, exist_ok=True)

    if os.path.isfile(os.path.join(directory, 'H_model_v{}_eval.pkl.gz'.format(model_version))):
        with gzip.open(os.path.join(directory, 'H_model_v{}_eval.pkl.gz'.format(model_version)), 'rb') as handle:
            data = pickle.load(handle)
        return data
    else:
        return None


## Build Hamiltonian

In [None]:
@njit
def get_h(L, W):
    h = np.random.uniform(-W, W, L)
    return h


In [None]:
def build_si_list(L):

    # Get single site operaors.
    sx = csr_matrix(np.array([[0.,  1. ], [1. ,  0.]]))
    sy = csr_matrix(np.array([[0., -1.j], [1.j,  0.]]))
    sz = csr_matrix(np.array([[1.,  0. ], [0. , -1.]]))
    id = csr_matrix(np.eye(2))

    # ========================================
    # Start cached area: si_list.
    # ========================================

    obj_params = {'L': L}

    sx_list = load_cache('sx_list', obj_params)
    sy_list = load_cache('sy_list', obj_params)
    sz_list = load_cache('sz_list', obj_params)

    if sx_list is None or sy_list is None or sz_list is None:

        # print('Cache not found for `si_list`. Generate from scratch.')

        sx_list = []  # sx_list[i] = kron([id, id, ..., id, sx, id, .... id])
        sy_list = []
        sz_list = []

        for i_site in range(L):

            x_ops = [id] * L
            y_ops = [id] * L
            z_ops = [id] * L
            x_ops[i_site] = sx
            y_ops[i_site] = sy
            z_ops[i_site] = sz

            X = x_ops[0]
            Y = y_ops[0]
            Z = z_ops[0]
            for j in range(1, L):
                X = kron(X, x_ops[j], 'csr')
                Y = kron(Y, y_ops[j], 'csr')
                Z = kron(Z, z_ops[j], 'csr')
            sx_list.append(X)
            sy_list.append(Y)
            sz_list.append(Z)

        save_cache(sx_list, 'sx_list', obj_params)
        save_cache(sy_list, 'sy_list', obj_params)
        save_cache(sz_list, 'sz_list', obj_params)

    # else:

    #     print('Cache found for `si_list`. Load from cache.')

    # ========================================
    # End cached area: si_list.
    # ========================================

    return sx_list, sy_list, sz_list


In [None]:
def build_H_ii(L, periodic):

    sx_list, sy_list, sz_list = build_si_list(L)

    # ========================================
    # Start cached area: H_ii.
    # ========================================
    
    obj_params = {'L': L, 'periodic': periodic}

    H_xx = load_cache('H_xx', obj_params)
    H_yy = load_cache('H_yy', obj_params)
    H_zz = load_cache('H_zz', obj_params)

    if H_xx is None or H_yy is None or H_zz is None:

        # print('Cache not found for `H_ii`. Generate from scratch.')

        H_xx = csr_matrix((2**L, 2**L))
        H_yy = csr_matrix((2**L, 2**L))
        H_zz = csr_matrix((2**L, 2**L))

        for i in range(L if periodic else L - 1):
            H_xx = H_xx + sx_list[i] * sx_list[(i + 1) % L]
            H_yy = H_yy + sy_list[i] * sy_list[(i + 1) % L]
            H_zz = H_zz + sz_list[i] * sz_list[(i + 1) % L]

        save_cache(H_xx, 'H_xx', obj_params)
        save_cache(H_yy, 'H_yy', obj_params)
        save_cache(H_zz, 'H_zz', obj_params)

    # else:

    #     print('Cache found for `H_ii`. Load from cache.')

    # ========================================
    # End cached area: H_ii.
    # ========================================

    return H_xx, H_yy, H_zz, sx_list, sy_list, sz_list


In [None]:
def build_H(L, W, J, periodic=False):

    H_xx, H_yy, H_zz, sx_list, sy_list, sz_list = build_H_ii(L, periodic)

    # H_z is not cached due to randomness.
    H_z  = csr_matrix((2**L, 2**L))
    h    = get_h(L, W)

    for i in range(L):
        H_z = H_z + h[i] * sz_list[i]

    H = J * (H_xx + H_yy + H_zz) - H_z

    return H

def build_Hs(L, W, J, periodic=False, num_Hs=1000):

    H_xx, H_yy, H_zz, sx_list, sy_list, sz_list = build_H_ii(L, periodic)

    Hs = []
    for i in tqdm(range(num_Hs), leave=False, desc='build_Hs()'):

        # H_z is not cached due to randomness.
        H_z  = csr_matrix((2**L, 2**L))
        h    = get_h(L, W)

        for i in range(L):
            H_z = H_z + h[i] * sz_list[i]

        H = J * (H_xx + H_yy + H_zz) - H_z
        Hs.append(H)

    return Hs


## Demo data loading

In [None]:
base_sample = 10000 # Samples per training W.
rand_sample = 50
Ws_train = np.random.randint(0,     2, size=(2 * base_sample,))
Ws_train = (Ws_train * 7.5) + 0.5 # i.e. Ws are 0.5 and 8.0.
Ws_valid = np.random.uniform(0.1, 5.9, size=(2 * base_sample // rand_sample,))
Ws_valid = (Ws_valid.reshape(-1, 1) * np.ones((1,50))).flatten()
print(Ws_train.shape)
print(Ws_valid.shape)

In [None]:
MBL = {
    "obj_name": 'H',
    "L": 8,
    "periodic": True,
    "Ws_train": Ws_train,
    "Ws_valid": Ws_valid,
}
obj_name = MBL['obj_name']
L        = MBL['L']
periodic = MBL['periodic']

In [None]:
from MBL_H_dataset import MBLHDataset

train_dataset = MBLHDataset(
    MBL_params=MBL,
    train=True,
    transform=transforms.ToTensor(),
    # transform=transforms.Compose([
    #     transforms.ToPILImage(),
    #     transforms.ToTensor()
    # ]),
)
valid_dataset = MBLHDataset(
    MBL_params=MBL,
    train=False,
    transform=transforms.ToTensor(),
    # transform=transforms.Compose([
    #     transforms.ToPILImage(),
    #     transforms.ToTensor()
    # ]),
)

print('Number of training samples  :', len(train_dataset))
print('Number of validation samples:', len(valid_dataset))

In [None]:
Ws_train[0]

In [None]:
# Two classes.
labels = ['Extended (Low W)', 'Localized (High W)']

image, W, label = train_dataset[0]["image"], train_dataset[0]["W"], train_dataset[0]["label"]
print("W: {:.2f}\nLabel: {}".format(W, labels[label]))
print("Shape of the image:", image.size())
print("Smallest value in the image:", torch.min(image))
print("Largest value in the image:", torch.max(image))
# print(image)

In [None]:
train_dataset[0]["image"][0,:,:].shape

In [None]:
def visualize_data(dataset):

    num = 2
    sample_idx = np.random.randint(0, len(dataset), size=num * num)

    fig, axes = plt.subplots(num, num, figsize=(fig_w/dpi,fig_h/dpi*2), dpi=dpi, squeeze=False)

    for i, idx in enumerate(sample_idx):
        image = dataset[idx]["image"][0,:,:]
        # image = np.log(image)
        axes[i%num,i//num].imshow(image, vmin=-10, vmax=10)#.squeeze(axis=0)))
        axes[i%num,i//num].annotate('W={:3.1f}'.format(dataset[idx]["W"]), (0.5,0.5), xycoords='axes fraction', ha='center', color='w')

    for axe in axes:
        for ax in axe:
            # ax.legend(loc='best')
            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            ax.xaxis.set_visible(False)
            ax.yaxis.set_visible(False)

    fig.tight_layout()

In [None]:
print('Visualize training data:')
visualize_data(train_dataset)

In [None]:
print('Visualize validation data:')
visualize_data(valid_dataset)

In [None]:
del train_dataset
del valid_dataset

## Neural network

Since NNs with the same `n` have the same input size, we will evaluate them using the same NN structure. As a side effect, results different `n` are not entirely comparable, but we will compare them anyway because reasons.  

Two classes `MBLModel` and `MBLDataset`, modified from a previous CNN facial recoginition code (own work), are used. The model structure and hyperparameters are defined using a dict called `hparams`. Inside it, specifications of the training data are passed using a nested dict `hparams["MBL"]`. The models are stored in a directory structure that mirrors that of the training data (reduced density matrices $\rho_A$).  

Caveat: Validation data isn't really unseen data from the training distribution $W \in \{0.5, 8\}$, but rather random W's that we'll be using them to predict $W_c$.  

See the other notebook for data generation.  

In [None]:
from MBL_H_model import MBLHModel
model_version = 1

# Two classes.
labels = ['Extended (Low W)', 'Localized (High W)']

In [None]:
# Default parameters that works.
L = 8
input_size = (1, 2**L, 2**L) # train_dataset[0]["image"].size()
output_size = 2 # [0, 1], two phases. == len(labels)

default_hparams = {
    # MBL Parameters:
    "MBL": None, # Insert later.
    # NN Parameters:
    "input_size" : (1, 2**L, 2**L), # train_dataset[0]["image"].size(),
    "output_size": output_size,
    "weight_decay": 0,
    "batch_size" : 10,
    "entry_count": 4, # #CNN before inception unit. 
    "group_count": 0, # #Inception units.
    "group_size" : 3, # #CNN in each Inception unit.
    # "exit_count": 2,
    "pool_every": 4,
    "layers_cnn": [
        {
            "in_channels": 2,
            "out_channels": 8,
            "kernel_size": 3,
            "stride": 1,
            "use_max_pool": True,
        },
        {
            "in_channels": 8,
            "out_channels": 16,
            "kernel_size": 5,
            "stride": 2,
            "use_max_pool": True,
        },
        {
            "in_channels": 16,
            "out_channels": 32,
            "kernel_size": 5,
            "stride": 2,
            "use_max_pool": True,
        },
        {
            "in_channels": 32,
            "out_channels": 64,
            "kernel_size": 3,
            "stride": 1,
            "use_max_pool": True,
        },
    ],
    # RuntimeError: size mismatch, m1: [2 x 13254], m2: [53016 x 30]
    # 13254 = ((96-2)/2) ^2 * 6
    "layers_fc": [
        {
            "in_features": 256, # = ((2^n - 2) / 2)^2 * 6
            "out_features": 128,
            "dropout": 0.5,
        },
        {
            "in_features": 128,
            "out_features": output_size,
        },
    ]
}

In [None]:
def get_MBL(L, p, base_sample):

    # base_sample = 10000 # Samples per training W.
    rand_sample = 50
    Ws_train = np.random.randint(0,     2, size=(2 * base_sample,))
    Ws_train = (Ws_train * 7.5) + 0.5 # i.e. Ws are 0.5 and 8.0.
    Ws_valid = np.random.uniform(0.1, 5.9, size=(2 * base_sample // rand_sample,))
    Ws_valid = (Ws_valid.reshape(-1, 1) * np.ones((1,50))).flatten()

    MBL = {
        "obj_name": 'H',
        "L": L,
        "periodic": True,
        "Ws_train": Ws_train,
        "Ws_valid": Ws_valid,
    }

    return MBL


In [None]:
def training_loop(default_hparams, MBL, epochs=60, filename='model_v{}.pkl.gz'.format(model_version), save=True):

    hparams = copy.deepcopy(default_hparams)
    hparams['MBL'] = MBL
    # seed_everything(hparams["seed"])
    model = MBLHModel(hparams=hparams)
    # model.prepare_data()
    # print(model)

    obj_name = MBL['obj_name']
    L        = MBL['L']
    periodic = MBL['periodic']

    if device == 'cpu':
        gpus = 0
    else:
        gpus = -1
    logger = TensorBoardLogger('lightning_logs', name='MBL_v{:d}'.format(model_version))
    scale_accum = 1

    trainer = pl.Trainer(
        gpus=gpus,
        logger=logger,
        max_epochs=epochs,
        min_epochs=10,
        profiler=True,
        # {5: 2, 10: 8} means no accumulation for epochs 1-4. accumulate 2 for epochs 5-10. accumulate 8 after that
        accumulate_grad_batches={
            1 : scale_accum * 1, 
            20: scale_accum * 2, 
            40: scale_accum * 4, 
            80: scale_accum * 8,
        },
        # accumulate_grad_batches=4,
        # weights_summary=None # [None,'top','full']
    )

    # print(hparams)
    # for (k, v) in hparams.items():
    #     print(k, v)

    trainer.fit(model)

    if save:
        save_H_model(model, filename, L, periodic)
        # model.to(device)

    return model


## Demo training

In [None]:
MBL = get_MBL(L, periodic, base_sample=1000)
print(L, periodic)
print(MBL)
# model = training_loop(default_hparams, MBL, epochs=10, save=True).to(device)

In [None]:
# save_H_model(model, 'model_v{}.pkl.gz'.format(model_version), L, periodic)
# model.to(device)

In [None]:
model = load_H_model('model_v{}.pkl.gz'.format(model_version), L, periodic)

## Visualize model predictions

In [None]:
def visualize_predictions(model, mode='train'):
    """Mode = ['train' | 'valid']"""

    labels = ['Extended (Low W)', 'Localized (High W)']

    # Sample model predictions.
    result_images  = []
    result_targets = []
    result_Ws      = []
    result_preds   = []
    result_probs   = []

    model.eval()
    SM  = torch.nn.Softmax()
    LSM = torch.nn.LogSoftmax()
    if mode == 'train':
        dataloader = DataLoader(model.dataset["train"], batch_size=1, shuffle=True)#, pin_memory=True)
    else:
        dataloader = DataLoader(model.dataset["val"], batch_size=1, shuffle=True)#, pin_memory=True)

    for batch in dataloader:

        images, targets, Ws = batch["image"], batch["label"], batch["W"]
        images  = images.to(device)
        outputs = model(images)
        images  = images.to('cpu')
        outputs = outputs.to('cpu')

        preds   = outputs.argmax(axis=1)
        probs   = SM(outputs)
        # probs2  = - LSM(outputs)
        # out_sum = probs2[:,0] + probs2[:,1]
        # probs2[:,0] = probs2[:,0] / out_sum
        # probs2[:,1] = probs2[:,1] / out_sum
        # Simple averaging doesn't work, because it's negative...
        # out_sum = outputs[:,0] + outputs[:,1]
        # outputs[:,0] = outputs[:,0] / out_sum
        # outputs[:,1] = outputs[:,1] / out_sum
        result_images  = result_images  + images.tolist()
        result_targets = result_targets + targets.tolist()
        result_Ws      = result_Ws      + Ws.tolist()
        result_preds   = result_preds   + preds.tolist()
        result_probs   = result_probs   + probs.tolist()
        # result_probs   = result_probs   + probs2.tolist()
        if len(result_images) >= 25:
            break # Because we only need 25 images.

    # Display images.
    sample_idx = np.random.randint(0, len(result_preds), size=5*5)

    fig, axes = plt.subplots(5, 5, figsize=(fig_w/dpi,fig_h/dpi*2), dpi=dpi, squeeze=False)

    for i, idx in enumerate(sample_idx):
        axes[i%5,i//5].imshow(np.array(result_images[idx])[0,:,:], vmin=-10, vmax=10)
        W      = result_Ws[idx]
        W_in   = result_targets[idx]
        W_pred = result_preds[idx]
        W_prob = result_probs[idx]
        annotation  = 'Input  : \n{}\nW={:.2f}\n\n'.format(labels[W_in], W)
        annotation += 'Predict: \n{}\n{:.0f}%'.format(labels[W_pred], W_prob[W_pred]*100)
        # annotation += 'Predict: \n{}\n{:.0f}%'.format(labels[W_pred], W_prob[(W_pred+1)%2]*100)
        if W_in == W_pred:
            ec = 'lime'
        else:
            ec = 'red'
        axes[i%5,i//5].annotate(annotation, (0.5,0.275), xycoords='axes fraction', ha='center', color='w', bbox=dict(facecolor='none', edgecolor=ec, boxstyle='round,pad=1', linewidth=2))

    for axe in axes:
        for ax in axe:
            # ax.legend(loc='best')
            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            ax.xaxis.set_visible(False)
            ax.yaxis.set_visible(False)

    fig.tight_layout()


### Sample training data

In [None]:
visualize_predictions(model, 'train')

### Sample validation data

In [None]:
visualize_predictions(model, 'valid')

### Model performance

In [None]:
def evaluate_model_core(model, dataset):

    model.eval()
    criterion = torch.nn.CrossEntropyLoss()
    SM = torch.nn.Softmax()
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False)#, pin_memory=True)
    loss = 0
    n_correct = 0

    for batch in dataloader:
        images, targets = batch["image"], batch["label"]
        images  = images.to(device)
        outputs = model(images).to('cpu')
        del images
        preds   = outputs.argmax(axis=1)
        # print(SM(outputs))
        loss += criterion(outputs, targets).item()
        n_correct += (preds == targets).sum().item()

    return loss, n_correct / len(dataset)

def evaluate_model(model):

    print("Training accuracy  : {:.4f}%".format(evaluate_model_core(model, model.dataset["train"])[1] * 100))
    print("Validation accuracy: {:.4f}%".format(evaluate_model_core(model, model.dataset["val"])[1]   * 100))


In [None]:
evaluate_model(model)

Training accuracy  : 100.0000%  
Validation accuracy: 88.3000%  


## Estimate transition disorder strength

In [None]:
def sigmoid(x, x0, y0, b):
    y = 1 / (1 + np.exp(-b * (x - x0))) + y0
    return y

# Logit function is the inverse of sigmoid.
def logit(y, x0, y0, b):
    x = np.log((y - y0) / (1 - (y - y0))) / b + x0
    return x


In [None]:
sigmoid(0,0,0,1)

In [None]:
logit(0.5,0,0,1)

In [None]:
# Remove y0 because it should be bounded/aligned with y = 0 and y = 1.
def sigmoid(x, x0, b):
    y = 1 / (1 + np.exp(-b * (x - x0))) # + y0
    return y

# Logit function is the inverse of sigmoid.
def logit(y, x0, b):
    x = np.log((y) / (1 - (y))) / b + x0
    return x


In [None]:
# model = model.to('cpu')

In [None]:
def calc_probs(model, dataset):

    # Sample model predictions.
    # result_images  = []
    result_targets = []
    result_Ws      = []
    result_preds   = []
    result_probs   = []

    model.eval()
    SM  = torch.nn.Softmax()
    LSM = torch.nn.LogSoftmax()
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False)#, pin_memory=True)
    for batch in dataloader:

        images, targets, Ws = batch["image"], batch["label"], batch["W"]
        
        images  = images.to(device)
        outputs = model(images)
        images  = images.to('cpu')
        outputs = outputs.to('cpu')
        preds   = outputs.argmax(axis=1)
        Ps      = SM(outputs)
        del images
        # result_images  = result_images  + images.tolist()
        result_targets = result_targets + targets.tolist()
        result_Ws      = result_Ws      + Ws.tolist()
        result_preds   = result_preds   + preds.tolist()
        result_probs   = result_probs   + Ps.tolist()

    result_Ws    = np.array(result_Ws)
    result_probs = np.array(result_probs)
    sorted_idx   = result_Ws.argsort()
    Ws = result_Ws[sorted_idx]
    Ps = result_probs[sorted_idx]

    # Compute mean and std.
    Ws_dict = OrderedDict()
    Ws_uniq = []
    Ps_mean = []
    Ps_std  = []
    # Ws is already sorted in `calc_probs()`.
    for W, P in zip(Ws, Ps[:,1]):
        if W not in Ws_dict:
            Ws_dict[W] = []
        Ws_dict[W].append(P)
    for (W, P) in Ws_dict.items():
        Ws_uniq.append(W)
        Ps_mean.append(np.mean(P))
        Ps_std.append(np.std(P, ddof=1))

    return Ws, Ps, np.array(Ws_uniq), np.array(Ps_mean), np.array(Ps_std)


In [None]:
def plot_crossing(Ws, Ps, Ws_uniq, Ps_mean, Ps_std):

    labels = ['Extended (Low W)', 'Localized (High W)']

    # Plot probability P(Localized) against W.
    fig, axes = plt.subplots(1, 1, figsize=(fig_w/dpi,fig_h/dpi), dpi=dpi, squeeze=False)

    # Plot averaged values with error bars.
    markers, caps, bars = axes[0,0].errorbar(Ws_uniq, Ps_mean, Ps_std, ls=' ', marker='x',capsize=2, capthick=2, label='P(Localized) Mean')
    # Loop through bars and caps and set the alpha value
    [bar.set_alpha(0.5) for bar in bars]
    [cap.set_alpha(0.5) for cap in caps]

    # Plot raw data.
    axes[0,0].plot(Ws, Ps,   ls=' ', marker='x', label='P(Localized)', alpha=0.1)
    # axes[0,0].plot(Ws, probs[:,0], ls=' ', marker='x', label='P(Extended)  (W small)')
    axes[0,0].set_title('Probability vs W (L={})'.format(MBL['L']))
    axes[0,0].set_xlabel('W')
    axes[0,0].set_ylabel('Probability')

    # Curve fit a sigmoid using all data.
    # Fitting only the mean with `Ws_uniq` and `Ps_mean` gives identical results.
    # popt, pcov = curve_fit(sigmoid, Ws, Ps, p0=[3, 0, 2]) # Add bounds or initial values if it doesn't converge.
    popt, pcov = curve_fit(sigmoid, Ws, Ps, p0=[3, 2]) # Add bounds or initial values if it doesn't converge.
    # x0, y0, b = popt
    x0, b = popt
    x = np.linspace(0, 10, 100)
    y = sigmoid(x, *popt)
    axes[0,0].plot(x, y, ls='--', label='Fit')
    # print('Fitted sigmoid function 1 / (1 + Exp(-{:.4f} (x - {:.4f}))) + {:.4f}'.format(b, x0, y0))
    print('Fitted sigmoid function 1 / (1 + Exp(-{:.4f} (x - {:.4f})))'.format(b, x0))

    W_c = logit(0.5, *popt)
    perr = np.sqrt(np.diag(pcov))
    print('Transition W_C is found to be at W = {:.4f} ± {:.4f}'.format(W_c, perr[0]))
    axes[0,0].axvline(W_c, c='r',         ls='--', label='$W_c$')
    axes[0,0].axvline(W_c - perr[0], c='r', alpha=0.3, ls='--', label='$W_c-$')
    axes[0,0].axvline(W_c + perr[0], c='r', alpha=0.3, ls='--', label='$W_c+$')
    axes[0,0].axhline(0.5, c='lightgrey', ls='--', label='$P=0.5$')

    for axe in axes:
        for ax in axe:
            ax.legend(loc='best')


In [None]:
# data = calc_probs(model, model.dataset["val"])
# save_H_eval_valid(data, model_version, L, periodic)

In [None]:
valid_Ws, valid_Ps, valid_Ws_uniq, valid_Ps_mean, valid_Ps_std = load_H_eval_valid(model_version, L, periodic)

In [None]:
plot_crossing(valid_Ws, valid_Ps[:,1], valid_Ws_uniq, valid_Ps_mean, valid_Ps_std)

## Batch training
The for-loop should be comparable to the one used to generate reduced density matrices.

In [None]:
del model.dataset["train"]
del model.dataset["val"]
del model

In [None]:
# Batch generate reduced density matrix.
n  = 6                   # !!! Important !!! Number of consecutive sites.
k  = 5                   # Number of eigenvalues near zero to save.
J  = 1                   # Always = 1
Ls = list(range(8,13,2)) # System sizes L.
ps = [False, True]       # Periodic or not.
et = []                  # Execution time.
num_EVs = [k]            # Number of eigenvalues near zero to save.
model_filename = 'model_v{}.pkl.gz'.format(model_version)

for L in Ls:
    for num_EV in num_EVs:
        for p in ps:
            start_time = time.time()
            print('{} | Training model for L={:02d} | n={:02d} | periodic={: <5} | num_EV={} ...'.format(dt(), L, n, str(p), num_EV), flush=True)

            if model_exists(model_filename, L, n, p, num_EV):
                print('Model exists. Training skipped.', flush=True)
            else:
                MBL = get_MBL(L, n, p, num_EV)
                try:
                    model = training_loop(default_hparams, MBL).to(device)
                    data = calc_probs(model, model.dataset["val"])
                    save_eval_valid(data, model_version, L, n, p, num_EV)
                except RuntimeError as err:
                    print('RuntimeError: {0}'.format(err), flush=True)
                    print('Insufficient data. Training skipped.', flush=True)

            exec_time = time.time() - start_time
            et.append(exec_time)
            print('{} | Computed: L={:02d} | n={:02d} | periodic={: <5} | num_EV={}.'.format(dt(), L, n, str(p), num_EV), flush=True)
            print('{} | Execution took {: 8.2f}s or {: 6.2f}min.'.format(dt(), exec_time, exec_time/60), flush=True)
            print(' ', flush=True)

# if check_shutdown_signal():
#     break

In [None]:
# Code for "annealing".
# hparams["use_adam"] = 1
# model_adam = MBLModel(hparams=hparams)
# model_adam.prepare_data()
# model_adam.load_state_dict(model.state_dict())

In [None]:
# Ws, Ps, Ws_uniq, Ps_mean, Ps_std = load_eval_valid(model_version, L, n, p, num_EV)