In [None]:
in_colab = False

# Project 13*: Machine Learning of Many Body Localization (Exact diagonalization + Machine Learning)

Use exact diagonalization to obtain all eigenstates of the the Heisenberg model with a
random field, 

\begin{equation}
    H = J \sum_i \vec{S}_{i} \cdot \vec{S}_{i+1} - \sum_i h_i S^z_i
\end{equation}

, where the values of the field $ h_i \in [-W, W] $ are chosen from a uniform random distribution with a "disorder strength" $W$ (Use moderate system sizes $L = [10, 12]$). 

The exciting property of this model is that it is believed to undergo a phase transition from an extended phase (small $W$) to a localized phase (large $W$). 

We will use ML to detect this transition: Pick a number of eigenstates that are near energy $E = 0$ and obtain the reduced density matrices $\rho^A$, where $A$ is a region of $n$ consecutive spins (a few hundred to thousands eigenstates for different disorder realizations). 

Now use the density matrices for $W = 0.5 J$ and $W = 8.0 J$ to train a neural network (just interpret the entries of $\rho^A$ as an image with $2^n \times 2^n$ pixel). Then use this network and study the output of the neural network for different $W$. 

How does the results depend on system size $L$ and block size $n$? At which $W_c$ do you expect the
transition to occur?

_Author: Tin Kei CHENG_  
_TUM, 2021_

## Imports

In [None]:
import os
import sys
import copy
import json
import gzip
import lzma
import pytz
import time
import pickle
import numpy as np
from numba import jit, njit # Set "nopython" mode for best performance, equivalent to @njit # cache=True, parallel=True
from datetime import datetime
from tqdm import tqdm

tz = pytz.timezone('Europe/Berlin')

import scipy
import scipy.linalg
import scipy.sparse.linalg

from scipy.sparse import csr_matrix, kron, identity
from scipy.sparse.linalg import eigsh
from scipy.linalg import svd
from scipy.optimize import curve_fit

from collections import OrderedDict

%load_ext autoreload
%autoreload 2

In [None]:
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from matplotlib.ticker import MaxNLocator

dpi = 100
fig_w = 1280
fig_h = 640

%matplotlib inline

In [None]:
if in_colab:
    ED_data_dir = 'drive/MyDrive/Colab Data/CMMP/ED_data'
    rho_train_data_dir = 'drive/MyDrive/Colab Data/CMMP/rho_train_data'
    rho_valid_data_dir = 'drive/MyDrive/Colab Data/CMMP/rho_valid_data'
    EVW_train_data_dir = 'drive/MyDrive/Colab Data/CMMP/EVW_train_data'
    EVW_valid_data_dir = 'drive/MyDrive/Colab Data/CMMP/EVW_valid_data'
    model_dir  = 'drive/MyDrive/Colab Data/CMMP/models'
    signal_dir = 'drive/MyDrive/Colab Data/CMMP'
else:
    ED_data_dir = 'ED_data'
    rho_train_data_dir = 'rho_train_data'
    rho_valid_data_dir = 'rho_valid_data'
    EVW_train_data_dir = 'EVW_train_data'
    EVW_valid_data_dir = 'EVW_valid_data'
    model_dir  = 'models'
    signal_dir = '.'

In [None]:
if in_colab:
    !cat /proc/cpuinfo

In [None]:
if in_colab:
    !pip install ipython-autotime
    %load_ext autotime

In [None]:
# if in_colab:
#     !pip install pytorch_lightning==0.7.6 torchsummary==1.5.1
#     !pip install torch==1.4.0+cu92 torchvision==0.5.0+cu92 -f https://download.pytorch.org/whl/torch_stable.html

In [None]:
# pip install torch==1.4.0+cu92 torchsummary==1.5.1 torchvision==0.5.0+cu92 pytorch-lightning==0.7.6  -f https://download.pytorch.org/whl/torch_stable.html
# import torch
# from torch.utils.data import DataLoader
# from torchvision import transforms

# import pytorch_lightning as pl
# from pytorch_lightning import Trainer, seed_everything
# from pytorch_lightning.callbacks import EarlyStopping
# from pytorch_lightning.loggers import TensorBoardLogger
# # from torchsummary import summary
# # help(summary)

import warnings
warnings.filterwarnings('ignore')

In [None]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# # device = "cpu"
# print(device)
# # python -c "import torch; print(torch.__version__)"

## Util functions

In [None]:
def dt():
    return datetime.now(tz=tz).strftime('%Y-%m-%d %H:%M:%S')

def dict_to_str(_dict):
    
    od = OrderedDict(sorted(_dict.items())) # Sort keys.
    s = json.dumps(od) # Turn dict into str.
    s = s[1:-1].replace('\"', '').replace(' ', '') # Replace some special characeters.
    s = ''.join(x if x.isalnum() else ('=' if x == ':' else '_') for x in s) # Replace all remaining special characters.

    return s


def save_cache(obj, obj_name, obj_params, cache_dir='cache'):
    """Cache an object, together with the parameters used to generate it.
    For `obj_params`, try not to use nested dict or with complicated objects.

    Parameters
    ----------
    obj : object
        An `object` you want to cache.
    obj_name : str
        A unique name you give to this object.
    obj_params : dict
        A `dict` of all parameters necessary to generate this object.
    cache_dir : str, optional
        Directory where the cache is located.
    """

    param_str = dict_to_str(obj_params)
    os.makedirs(os.path.join(cache_dir, obj_name), exist_ok=True)
    with gzip.open(os.path.join(cache_dir, obj_name, param_str + '.pkl.gz'), 'wb') as handle:
        pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)


def load_cache(obj_name, obj_params, cache_dir='cache'):
    """Check if the object is cached. If not, return None.
    For `obj_params`, try not to use nested dict or with complicated objects.

    Parameters
    ----------
    obj_name : str
        A unique name you give to this object.
    obj_params : dict
        A `dict` of all parameters necessary to generate this object.
    cache_dir : str, optional
        Directory where the cache is located.
    """

    param_str = dict_to_str(obj_params)
    os.makedirs(os.path.join(cache_dir, obj_name), exist_ok=True)
    if os.path.isfile(os.path.join(cache_dir, obj_name, param_str + '.pkl.gz')):
        with gzip.open(os.path.join(cache_dir, obj_name, param_str + '.pkl.gz'), 'rb') as handle:
            obj = pickle.load(handle)
            return obj
    else:
        return None


def check_shutdown_signal(signal_dir=signal_dir):
    """To gracefully stop generating data by making sure a loop is completed, this function will read a text file in a directory for the value `1`.
    
    Return
    ------
    shutdown : bool
        Shutdown signal detected.
    """

    os.makedirs(os.path.join(signal_dir), exist_ok=True)
    if os.path.isfile(os.path.join(signal_dir, 'shutdown_signal.txt')):
        with open(os.path.join(signal_dir, 'shutdown_signal.txt')) as f:
            lines = f.readlines()
        if lines is not None and len(lines) > 0:
            lines = [x.strip() for x in lines]
            if lines[0] == '1':
                return True

    return False


@njit
def is_sorted(arr):
    return np.all(arr[:-1] <= arr[1:])


def save_ED(obj, obj_name, L, W, periodic, data_dir=ED_data_dir):
    """Save a list of exact diagonalization results, organized by the parameters used to generate them.
    For `obj_params`, try not to use nested dict or with complicated objects.

    Parameters
    ----------
    obj : list
        A list of lists, where each reduced density matrix is paired with its disorder strength W.
        Number of data must be a multiple of 10.
    obj_name : str
        A unique name you give to this object. Call it `rho_A`.
    L : int
        System size.
    W : float
        Disorder strength.
    periodic : bool
        Whether the Hamiltonian is periodic.
    data_dir : str, optional
        Directory where the data is saved.
    """

    directory = os.path.join(data_dir, obj_name, 'L={:02d}'.format(L), 'W={:.2f}'.format(W), 'periodic={}'.format(periodic))
    os.makedirs(directory, exist_ok=True)

    # Check if file exists, and increment suffix.
    i = 0
    while os.path.exists(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i))):
        i += 1

    with gzip.open(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i)), 'wb') as handle:
        pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)


def load_ED(obj_name, L, W, periodic, data_dir=ED_data_dir):
    """Check if the object is cached. If not, return None.
    For `obj_params`, try not to use nested dict or with complicated objects.

    Parameters
    ----------
    obj_name : str
        A unique name you give to this object. Call it `rho_A`.
    L : int
        System size.
    W : float
        Disorder strength.
    periodic : bool
        Whether the Hamiltonian is periodic.
    data_dir : str, optional
        Directory where the data is saved.
    """

    raise NotImplementedError('Function not implemented.')

    param_str = dict_to_str(obj_params)
    os.makedirs(os.path.join(data_dir, obj_name, 'L={:2d}'.format(L), 'W={:.2d}'.format(W)), exist_ok=True)
    if os.path.isfile(os.path.join(data_dir, obj_name, 'L={:2d}'.format(L), 'W={:.2d}'.format(W), param_str + '.pkl.gz')):
        with gzip.open(os.path.join(data_dir, obj_name, 'L={:2d}'.format(L), 'W={:.2d}'.format(W), param_str + '.pkl.gz'), 'rb') as handle:
            obj = pickle.load(handle)
            return obj
    else:
        return None


In [None]:
def save_EVW_train(obj, obj_name, L, periodic, num_EV, data_dir=EVW_train_data_dir):
    """Save a list of reduced density matrices with W = {0.5, 8}.

    Parameters
    ----------
    obj : list
        A list of lists, where each reduced density matrix is paired with its disorder strength W.
        i.e. obj[i][0] is a 2D numpy.ndarray of the reduced density matrix, and obj[i][1] is the disorder strength used to generate it.
        Number of data must be a multiple of 10.
    obj_name : str
        A name you give to this object. Call it `rho_A`.
    L : int
        System size.
    periodic : bool
        Whether the Hamiltonian is periodic.
    num_EV : int
        Number of eigenvalues around zero being sampled.
    data_dir : str, optional
        Directory where the data is saved.
    """

    directory = os.path.join(data_dir, 'L={:02d}'.format(L), 'periodic={}'.format(periodic), 'num_EV={}'.format(num_EV))
    os.makedirs(directory, exist_ok=True)

    # Check if file exists, and increment suffix.
    i = 0
    while os.path.exists(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i))):
        i += 1

    with gzip.open(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i)), 'wb') as handle:
        pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)


def save_EVW_valid(obj, obj_name, L, periodic, num_EV, data_dir=EVW_valid_data_dir):
    """Save a list of reduced density matrices with random W != {0.5, 8}.

    Parameters
    ----------
    obj : list
        A list of lists, where each reduced density matrix is paired with its disorder strength W.
        i.e. obj[i][0] is a 2D numpy.ndarray of the reduced density matrix, and obj[i][1] is the disorder strength used to generate it.
        Number of data must be a multiple of 10.
    obj_name : str
        A name you give to this object. Call it `rho_A`.
    L : int
        System size.
    periodic : bool
        Whether the Hamiltonian is periodic.
    num_EV : int
        Number of eigenvalues around zero being sampled.
    data_dir : str, optional
        Directory where the data is saved.
    """

    directory = os.path.join(data_dir, 'L={:02d}'.format(L), 'periodic={}'.format(periodic), 'num_EV={}'.format(num_EV))
    os.makedirs(directory, exist_ok=True)

    # Check if file exists, and increment suffix.
    i = 0
    while os.path.exists(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i))):
        i += 1

    with gzip.open(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i)), 'wb') as handle:
        pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [None]:
def load_EVW_train(obj_name, L, periodic, num_EV, data_dir=EVW_train_data_dir):
    """Load a list of reduced density matrices with W = {0.5, 8}.

    Parameters
    ----------
    obj_name : str
        A name you give to this object. Call it `rho_A`.
    L : int
        System size.
    periodic : bool
        Whether the Hamiltonian is periodic.
    num_EV : int
        Number of eigenvalues around zero being sampled.
    data_dir : str, optional
        Directory where the data is saved.

    Return
    ------
    obj : list
        A list of lists, where each reduced density matrix is paired with its disorder strength W.
        i.e. obj[i][0] is a 2D numpy.ndarray of the reduced density matrix, and obj[i][1] is the disorder strength used to generate it.
        Number of data must be a multiple of 10.
    """

    directory = os.path.join(data_dir, 'L={:02d}'.format(L), 'periodic={}'.format(periodic), 'num_EV={}'.format(num_EV))
    os.makedirs(directory, exist_ok=True)

    # Check if file exists, load the file, and increment suffix.
    i = 0
    data = []
    while os.path.exists(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i))):
        with gzip.open(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i)), 'rb') as handle:
            data = data + pickle.load(handle)
        i += 1

    return data


def load_EVW_valid(obj_name, L, periodic, num_EV, data_dir=EVW_valid_data_dir):
    """Load a list of reduced density matrices with random W != {0.5, 8}.

    Parameters
    ----------
    obj_name : str
        A name you give to this object. Call it `rho_A`.
    L : int
        System size.
    periodic : bool
        Whether the Hamiltonian is periodic.
    num_EV : int
        Number of eigenvalues around zero being sampled.
    data_dir : str, optional
        Directory where the data is saved.

    Return
    ------
    obj : list
        A list of lists, where each reduced density matrix is paired with its disorder strength W.
        i.e. obj[i][0] is a 2D numpy.ndarray of the reduced density matrix, and obj[i][1] is the disorder strength used to generate it.
        Number of data must be a multiple of 10.
    """

    directory = os.path.join(data_dir, 'L={:02d}'.format(L), 'periodic={}'.format(periodic), 'num_EV={}'.format(num_EV))
    os.makedirs(directory, exist_ok=True)

    # Check if file exists, load the file, and increment suffix.
    i = 0
    data = []
    while os.path.exists(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i))):
        with gzip.open(os.path.join(directory, obj_name + '-{:09d}.pkl.gz'.format(i)), 'rb') as handle:
            data = data + pickle.load(handle)
        i += 1

    return data


In [None]:
def save_model(model, file_name, L, n, periodic, num_EV, directory=model_dir):
    """Save model as pickle"""

    model = model.cpu()
    model_dict = {
        "state_dict": model.state_dict(),
        "hparams": model.hparams
    }

    directory = os.path.join(directory, 'L={:02d}'.format(L), 'n={:02d}'.format(n), 'periodic={}'.format(periodic), 'num_EV={}'.format(num_EV))
    os.makedirs(directory, exist_ok=True)

    model_path = os.path.join(directory, file_name)
    with gzip.open(model_path, 'wb', 4) as handle:
        pickle.dump(model_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    return model_path


def load_model(file_name, L, n, periodic, num_EV, directory=model_dir):

    directory = os.path.join(directory, 'L={:02d}'.format(L), 'n={:02d}'.format(n), 'periodic={}'.format(periodic), 'num_EV={}'.format(num_EV))
    os.makedirs(directory, exist_ok=True)

    model_path = os.path.join(directory, file_name)
    with gzip.open(model_path, 'rb') as fp:
        return pickle.load(fp)["state_dict"]


def model_exists(file_name, L, n, periodic, num_EV, directory=model_dir):

    directory = os.path.join(directory, 'L={:02d}'.format(L), 'n={:02d}'.format(n), 'periodic={}'.format(periodic), 'num_EV={}'.format(num_EV))
    os.makedirs(directory, exist_ok=True)

    model_path = os.path.join(directory, file_name)
    return os.path.exists(model_path)


## Build Hamiltonian and Exact Diagonalization

### Functions

In [None]:
@njit
def get_h(L, W):
    h = np.random.uniform(-W, W, L)
    return h


In [None]:
def build_si_list(L):

    # Get single site operaors.
    sx = csr_matrix(np.array([[0.,  1. ], [1. ,  0.]]))
    sy = csr_matrix(np.array([[0., -1.j], [1.j,  0.]]))
    sz = csr_matrix(np.array([[1.,  0. ], [0. , -1.]]))
    id = csr_matrix(np.eye(2))

    # ========================================
    # Start cached area: si_list.
    # ========================================

    obj_params = {'L': L}

    sx_list = load_cache('sx_list', obj_params)
    sy_list = load_cache('sy_list', obj_params)
    sz_list = load_cache('sz_list', obj_params)

    if sx_list is None or sy_list is None or sz_list is None:

        # print('Cache not found for `si_list`. Generate from scratch.')

        sx_list = []  # sx_list[i] = kron([id, id, ..., id, sx, id, .... id])
        sy_list = []
        sz_list = []

        for i_site in range(L):

            x_ops = [id] * L
            y_ops = [id] * L
            z_ops = [id] * L
            x_ops[i_site] = sx
            y_ops[i_site] = sy
            z_ops[i_site] = sz

            X = x_ops[0]
            Y = y_ops[0]
            Z = z_ops[0]
            for j in range(1, L):
                X = kron(X, x_ops[j], 'csr')
                Y = kron(Y, y_ops[j], 'csr')
                Z = kron(Z, z_ops[j], 'csr')
            sx_list.append(X)
            sy_list.append(Y)
            sz_list.append(Z)

        save_cache(sx_list, 'sx_list', obj_params)
        save_cache(sy_list, 'sy_list', obj_params)
        save_cache(sz_list, 'sz_list', obj_params)

    # else:

    #     print('Cache found for `si_list`. Load from cache.')

    # ========================================
    # End cached area: si_list.
    # ========================================

    return sx_list, sy_list, sz_list


In [None]:
def build_H_ii(L, periodic):

    sx_list, sy_list, sz_list = build_si_list(L)

    # ========================================
    # Start cached area: H_ii.
    # ========================================
    
    obj_params = {'L': L, 'periodic': periodic}

    H_xx = load_cache('H_xx', obj_params)
    H_yy = load_cache('H_yy', obj_params)
    H_zz = load_cache('H_zz', obj_params)

    if H_xx is None or H_yy is None or H_zz is None:

        # print('Cache not found for `H_ii`. Generate from scratch.')

        H_xx = csr_matrix((2**L, 2**L))
        H_yy = csr_matrix((2**L, 2**L))
        H_zz = csr_matrix((2**L, 2**L))

        for i in range(L if periodic else L - 1):
            H_xx = H_xx + sx_list[i] * sx_list[(i + 1) % L]
            H_yy = H_yy + sy_list[i] * sy_list[(i + 1) % L]
            H_zz = H_zz + sz_list[i] * sz_list[(i + 1) % L]

        save_cache(H_xx, 'H_xx', obj_params)
        save_cache(H_yy, 'H_yy', obj_params)
        save_cache(H_zz, 'H_zz', obj_params)

    # else:

    #     print('Cache found for `H_ii`. Load from cache.')

    # ========================================
    # End cached area: H_ii.
    # ========================================

    return H_xx, H_yy, H_zz, sx_list, sy_list, sz_list


In [None]:
def build_H(L, W, J, periodic=False):

    H_xx, H_yy, H_zz, sx_list, sy_list, sz_list = build_H_ii(L, periodic)

    # H_z is not cached due to randomness.
    H_z  = csr_matrix((2**L, 2**L))
    h    = get_h(L, W)

    for i in range(L):
        H_z = H_z + h[i] * sz_list[i]

    H = J * (H_xx + H_yy + H_zz) - H_z

    return H

def build_Hs(L, W, J, periodic=False, num_Hs=1000):

    H_xx, H_yy, H_zz, sx_list, sy_list, sz_list = build_H_ii(L, periodic)

    Hs = []
    for i in tqdm(range(num_Hs), leave=False, desc='build_Hs()'):

        # H_z is not cached due to randomness.
        H_z  = csr_matrix((2**L, 2**L))
        h    = get_h(L, W)

        for i in range(L):
            H_z = H_z + h[i] * sz_list[i]

        H = J * (H_xx + H_yy + H_zz) - H_z
        Hs.append(H)

    return Hs


In [None]:
@njit
def ED(H):
    """For comparison: obtain ground state energy from exact diagonalization.

    Exponentially expensive in L, only works for small enough `L` <~ 20.

    The column V[:, i] is the normalized eigenvector corresponding to the eigenvalue E[i].
    Will return a matrix object if a is a matrix object.

    Parameters
    ----------
    H : numpy.ndarray
        Hamiltonian to diagonalize.

    Return
    ------
    E : 1D numpy.ndarray
        Eigenvalues, sorted in ascending order.
    V : 2D numpy.ndarray
        Eigenvectors.
    """

    # if L >= 20:
    #     warnings.warn("Large L: Exact diagonalization might take a long time!")

    E, V = np.linalg.eigh(H)

    assert is_sorted(E), 'Eigenvalues not sorted!'

    return E, V


def ED_sparse(H, k):
    """For comparison: obtain ground state energy from exact diagonalization.

    Exponentially expensive in L, only works for small enough `L` <~ 20.

    An array representing the k eigenvectors. The column v[:, i] is the eigenvector corresponding to the eigenvalue w[i].

    Parameters
    ----------
    H : numpy.ndarray
        Hamiltonian to diagonalize.
    k : int
        Number of eigenvalues around E = 0 to obtain.

    Return
    ------
    E : 1D numpy.ndarray
        Eigenvalues, sorted in ascending order.
    V : 2D numpy.ndarray
        Eigenvectors.
    """

    # if L >= 20:
    #     warnings.warn("Large L: Exact diagonalization might take a long time!")

    E, V = scipy.sparse.linalg.eigsh(H, k=k, sigma=0, which='LM', return_eigenvectors=True)
    sorted_indices = np.abs(E).argsort()
    E = E[sorted_indices]
    V = V[:, sorted_indices]

    assert is_sorted(np.abs(E)), 'Eigenvalues not sorted!'

    return E, V


def EDs(Hs):
    """For comparison: obtain ground state energy from exact diagonalization.

    Exponentially expensive in L, only works for small enough `L` <~ 20.

    The column V[:, i] is the normalized eigenvector corresponding to the eigenvalue E[i].
    Will return a matrix object if a is a matrix object.

    Parameters
    ----------
    Hs : list of scipy.sparse.csr_matrix
        A list of Hamiltonians to diagonalize.

    Return
    ------
    E : list of 1D numpy.ndarray
        Eigenvalues of each Hamiltonian, sorted in ascending order.
    V : list of 2D numpy.ndarray
        Eigenvectors of each Hamiltonian.
    """

    # if L >= 20:
    #     warnings.warn("Large L: Exact diagonalization might take a long time!")

    Es = []
    Vs = []
    for H in Hs:

        # Can't use scipy's eigsh, because we need ALL eigenwhatevers.
        # E, V = eigsh(H, k=10, which='SM', return_eigenvectors=True)
        # E, V = np.linalg.eigh(H.A)
        E, V = ED(H.toarray())
        Es.append(E)
        Vs.append(V)

    return Es, Vs


def EDs_sparse(Hs, k):
    """For comparison: obtain ground state energy from exact diagonalization.

    Exponentially expensive in L, only works for small enough `L` <~ 20.

    An array representing the k eigenvectors. The column v[:, i] is the eigenvector corresponding to the eigenvalue w[i].

    Parameters
    ----------
    Hs : list of scipy.sparse.csr_matrix
        A list of Hamiltonians to diagonalize.
    k : int
        Number of eigenvalues around E = 0 to obtain.

    Return
    ------
    E : 1D numpy.ndarray
        Eigenvalues, sorted in ascending order.
    V : 2D numpy.ndarray
        Eigenvectors.
    """

    # if L >= 20:
    #     warnings.warn("Large L: Exact diagonalization might take a long time!")

    Es = []
    Vs = []
    for H in tqdm(Hs, leave=False, desc='EDs_sparse()'):

        E, V = scipy.sparse.linalg.eigsh(H, k=k, sigma=0, which='LM', return_eigenvectors=True)
        sorted_indices = np.abs(E).argsort()
        E = E[sorted_indices]
        V = V[:, sorted_indices]
        Es.append(E)
        Vs.append(V)

    return Es, Vs



In [None]:
# @njit
def select_N_eigenvalues(E, V, n, where='zeroest'):
    """
    Select N eigenvalues closest to the lowest, to zero, or to the highest.

    Parameters
    ----------
    E : 1D numpy.ndarray
        Sorted eigenvalues in ascending order.
    V : 2D numpy.ndarray
        Corresponding eigenvectors.
        The column V[:, i] is the normalized eigenvector corresponding to the eigenvalue E[i].
    n : int
        Number of eigenvalues to store.
    where : str
        Where to select the eigenvalues. where = {'lowest', 'zeroest', 'highest'}
    """

    if where == 'lowest':
        E = E[:n]
        V = V[:, :n]
    elif where == 'highest':
        E = E[-n:]
        V = V[:, -n:]
    elif where == 'zeroest':
        # closest_indices = np.abs(E).argsort()[:n]
        # E = E[closest_indices]
        # V = V[:, closest_indices]
        # Faster implementation, but Numba doesn't support this. Still, it is a few microseconds faster.
        # Source: https://stackoverflow.com/questions/16817948/i-have-need-the-n-minimum-index-values-in-a-numpy-array
        closest_indices = np.argpartition(np.abs(E), n)[:n]
        E_temp = E[closest_indices]
        V_temp = V[:, closest_indices]
        sorted_indices = np.abs(E_temp).argsort()[:n]
        E = E_temp[sorted_indices]
        V = V_temp[:, sorted_indices]
        # assert np.all(E1 == E), 'Both sorting methods should be identical.'
        # assert np.all(V1 == V), 'Both sorting methods should be identical.'

    return E, V


### Computations

In [None]:
# Test solving a single Hamiltonian.
L = 8
W = 0.5
J = 1
periodic = False
num_Hs = 10

H = build_H(L, W, J, periodic=False)
E, V = ED(H.toarray())
print('2^L: {}'.format(2**L))
print('#eigenvalues: {}'.format(len(E)))
print('E.shape: {}'.format(E.shape))
print('V.shape: {}'.format(V.shape))

In [None]:
%%timeit
# Solve using numpy's dense solver
E, V = ED(H.toarray())
E0, V0 = select_N_eigenvalues(E, V, 20)

In [None]:
%%timeit
# Solve using scipy's sparse solver instead.
E1, V1 = ED_sparse(H, 20)

In [None]:
# Solve using numpy's dense solver
E, V = ED(H.toarray())
E0, V0 = select_N_eigenvalues(E, V, 20)
# Solve using scipy's sparse solver instead.
E1, V1 = ED_sparse(H, 20)

In [None]:
# Check eigenvalues are sorted correctly, and eigenvectors are selected along the correct axis.
V0_norm = []
V1_norm = []
for i in range(len(E0)):
    V0_norm.append(np.linalg.norm(V0[:,i]))
    V1_norm.append(np.linalg.norm(V1[:,i]))

for E0i, E1i, V0i, V1i in zip(E0, E1, V0_norm, V1_norm):
    print('Numpy: {:+.12f} | Numpy norm: {:.16f}'.format(E0i, V0i))
    print('Scipy: {:+.12f} | Scipy norm: {:.16f}'.format(E1i, V1i))

assert np.allclose(V0i, V1i), 'Eigenvectors evaluated using Numpy and Scipy should be identical.'

In [None]:
# Test solving multiple Hamiltonians.
Hs = build_Hs(L, W, J, periodic, num_Hs)
Es, Vs = EDs(Hs)
E0s, V0s = EDs_sparse(Hs, 20)

In [None]:
def batch_generate_ED_data(L, W, J=1, periodic=False, num_Hs=1000, num_EV=20, save_data=False, npsp='sp'):

    obj_params = {'J': J, 'periodic': periodic} # Drop L and W. We use them for subdirectories.
    Hs = build_Hs(L, W, J, periodic, num_Hs)

    if npsp == 'sp':
        E0s, V0s = EDs_sparse(Hs, num_EV)
    else:
        Es, Vs = EDs(Hs)
        E0s = []
        V0s = []
        for E, V in zip(Es, Vs):
            E0, V0 = select_N_eigenvalues(E, V, num_EV)
            E0s.append(E0)
            V0s.append(V0)

    if save_data:
        save_ED(E0s, 'E0s', L, W, periodic)
        save_ED(V0s, 'V0s', L, W, periodic)

    return E0s, V0s


In [None]:
# Test execution time.
J  = 1                      # Always = 1
Ws = [8]                    # Disorder strength W.
Ls = list(range(8,12))      # System size L.
ns = [1]*len(Ls)            # Number of samples for each L.
ps = [False]                # Periodic or not.
et = []                     # Execution time.
num_EV = 20                 # Number of eigenvalues near zero to save.

for L, num_Hs in zip(Ls, ns):
    for W in Ws:
        for p in ps:
            start_time = time.time()
            batch_generate_ED_data(L, W, J, p, num_Hs, num_EV, save_data=False, npsp='np')
            exec_time = time.time() - start_time
            et.append(exec_time)
            print('Computed: L={:02d} | W={:.2f} | periodic={: <5}. Execution took {: 8.2f}s or {: 6.2f}min'.format(L, W, str(p), exec_time, exec_time/60))


In [None]:
fig, axes = plt.subplots(1, 2, figsize=(fig_w/dpi,fig_h/dpi), dpi=dpi, squeeze=False)

base = 10
axes[0,0].plot(Ls, et)
axes[0,1].plot(np.array(Ls), np.log(et) / np.log(base))

axes[0,0].set_title('Execution time vs System size')
axes[0,1].set_title('Log(Execution time) vs System size')
axes[0,0].set_ylabel('Execution time')
axes[0,1].set_ylabel('Log(Execution time)')

for axe in axes:
    for ax in axe:
        ax.set_xlabel('System size L')
        # ax.legend(loc='best')
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))

In [None]:
# Test execution time.
J  = 1                      # Always = 1
Ws = [8]                    # Disorder strength W.
Ls = list(range(8,16))      # System size L.
ns = [1]*len(Ls)            # Number of samples for each L.
ps = [False]                # Periodic or not.
et = []                     # Execution time.
num_EV = 20                 # Number of eigenvalues near zero to save.

for L, num_Hs in zip(Ls, ns):
    for W in Ws:
        for p in ps:
            start_time = time.time()
            batch_generate_ED_data(L, W, J, p, num_Hs, num_EV, save_data=False, npsp='sp')
            exec_time = time.time() - start_time
            et.append(exec_time)
            print('Computed: L={:02d} | W={:.2f} | periodic={: <5}. Execution took {: 8.2f}s or {: 6.2f}min'.format(L, W, str(p), exec_time, exec_time/60))


In [None]:
fig, axes = plt.subplots(1, 2, figsize=(fig_w/dpi,fig_h/dpi), dpi=dpi, squeeze=False)

base = 10
axes[0,0].plot(Ls, et)
axes[0,1].plot(np.array(Ls), np.log(et) / np.log(base))

axes[0,0].set_title('Execution time vs System size')
axes[0,1].set_title('Log(Execution time) vs System size')
axes[0,0].set_ylabel('Execution time')
axes[0,1].set_ylabel('Log(Execution time)')

for axe in axes:
    for ax in axe:
        ax.set_xlabel('System size L')
        # ax.legend(loc='best')
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))

In [None]:
# Sample parameters.
J  = 1                                 # Always = 1
Ws = [0.5, 8] * 10 + [i/2 for i in range(1*2, 10*2)] # Disorder strength W.
Ls = [   8,   9,  10,  11, 12, 13, 14] # System size L.
ns = [1000, 500, 250, 100, 50, 20, 10] # Number of samples for each L.
ps = [False, True]                     # Periodic or not.

In [None]:
# Test file size. It's not feasible to store all eigenvectors.
J  = 1             # Always = 1
Ws = [0.5, 8] * 10 + [i/2 for i in range(1*2, 10*2)] # Disorder strength W.
Ls = [   8]        # System size L.
ns = [1000]        # Number of samples for each L.
ps = [False, True] # Periodic or not.

fs = 0
for L, num_Hs in zip(Ls, ns):
    for W in Ws:
        for p in ps:
            start_time = time.time()
            # batch_generate_ED_data(L, W, J, p, num_Hs)
            fs += 1
            exec_time = time.time() - start_time
            # print('Computed: L={:02d} | W={:.2f} | periodic={: <5}. Execution took {: 8.2f}s or {: 6.2f}min'.format(L, W, str(p), exec_time, exec_time/60))
print('Eigenvectors `Vs` dominates the file size. Assume 1000 samples are generated for each W, for around 20 Ws:')
print('For L=8, each Es file is 2MB, Hs is 23MB, Vs is 1000MB.')
print('Estimate file size of each run is thus {}MB'.format(fs*(2+23+1000)))

In [None]:
print('The full eigenvectors `Vs` dominates the file size.')
for i in range(5):
    print('For L = {:2d}, Vs is {:3d} MB.'.format(8+i, 4**i))

## Batch generate data and visualize eigenvectors for different W

In [None]:
def batch_gen_EVW_data_core(L, Ws, J=1, periodic=False, num_Hs=1000, num_EV=5):

    EVWs = []

    for W in Ws:

        Hs = build_Hs(L, W, J, periodic, num_Hs)
        E0s, V0s = EDs_sparse(Hs, num_EV)

        for E0, V0 in zip(E0s, V0s):
            for i, E in enumerate(E0):
                EVWs.append([E0[i], V0[:,i], W])

    return EVWs


In [None]:
def batch_gen_EVW_data_main(L, Ws, J=1, periodic=False, num_Hs=1000, num_EV=5, save_data=True):
    """Generate a list of eigenvectors with W = {0.5, 8}, and save them to file.
    The data is used to train a classifier neutral network.

    Parameters
    ----------
    L : int
        System size.
    Ws : list of float
        A list of disorder strength W to realize.
    J : float
        Coupling strength. Always set to 1.
    periodic : bool
        Whether the Hamiltonian is periodic.
    num_Hs : int
        Number of Hamiltonians to generate, per disorder strength W.
    num_EV : int
        Number of eigenvalues around zero to use as samples.

    Return
    ------
    EVWs : list of list
        A list where each element is [E, V, W] = [eigenvalue, eigenvector, disorder strength].
    """

    EVWs = batch_gen_EVW_data_core(L, Ws, J, periodic, num_Hs, num_EV)

    if save_data:
        print('Writing {: 5d} `EVW` of system size L = {:02d}.'.format(len(EVWs), L), flush=True)
        save_EVW_train(EVWs, 'EVWs', L, periodic, num_EV)

    return EVWs


In [None]:
def batch_gen_EVW_data_rand(L, Ws, J=1, periodic=False, num_Hs=1000, num_EV=5, save_data=True):
    """Generate a list of eigenvectors with W = {0.5, 8}, and save them to file.
    The data is used to train a classifier neutral network.

    Parameters
    ----------
    L : int
        System size.
    Ws : list of float
        A list of disorder strength W to realize.
    J : float
        Coupling strength. Always set to 1.
    periodic : bool
        Whether the Hamiltonian is periodic.
    num_Hs : int
        Number of Hamiltonians to generate, per disorder strength W.
    num_EV : int
        Number of eigenvalues around zero to use as samples.

    Return
    ------
    EVWs : list of list
        A list where each element is [E, V, W] = [eigenvalue, eigenvector, disorder strength].
    """

    EVWs = batch_gen_EVW_data_core(L, Ws, J, periodic, num_Hs, num_EV)

    if save_data:
        print('Writing {: 5d} `EVW` of system size L = {:02d}.'.format(len(EVWs), L), flush=True)
        save_EVW_valid(EVWs, 'EVWs', L, periodic, num_EV)

    return EVWs


In [None]:
base_samples = 2**9

In [None]:
# Test drawing images of reduced density matrix.
J  = 1                      # Always = 1
Ws = [0.5, 8]               # Disorder strength W.
Ls = [10]                   # System size L.
Hs = [base_samples]         # Number of samples per L per W.
ps = [False]                # Periodic or not.
et = []                     # Execution time.
num_EV = 1                  # Number of eigenvalues near zero to save.
EVWs_main = []

for L, num_Hs in zip(Ls, Hs):
    for p in ps:
        start_time = time.time()
        EVWsi = batch_gen_EVW_data_main(L, Ws, J, p, num_Hs, num_EV, save_data=False)
        EVWs_main.extend(EVWsi)
        exec_time = time.time() - start_time
        et.append(exec_time)
        print('Computed: L={:02d} | periodic={: <5}.'.format(L, str(p)))
        print('Execution took {: 8.2f}s or {: 6.2f}min.'.format(exec_time, exec_time/60))


In [None]:
print(len(EVWs_main))       # = base_samples * len(Ws) * num_EV
print(len(EVWs_main[0]))    # = 3
print(len(EVWs_main[0][1])) # = 2**L

In [None]:
Wext_Vs = []
Wloc_Vs = []

for E, V, W in EVWs_main:

    if W == 0.5:
        Wext_Vs.append(V)
    elif W == 8:
        Wloc_Vs.append(V)

Wext_Vs = np.array(Wext_Vs)
Wloc_Vs = np.array(Wloc_Vs)
print(Wext_Vs.shape)
print(Wloc_Vs.shape)
# Reshape due to `num_EV`.
Wext_Vs = Wext_Vs.reshape(-1, 2**10)
Wloc_Vs = Wext_Vs.reshape(-1, 2**10)
print(Wext_Vs.shape)
print(Wloc_Vs.shape)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(fig_w/dpi,fig_h/dpi), dpi=dpi, squeeze=False)

axes[0,0].imshow(np.abs(Wext_Vs), vmin=0, vmax=1e-1)
axes[0,0].annotate('W={:3.1f}'.format(0.5), (0.5,0.5), xycoords='axes fraction', ha='center', color='w')

axes[0,1].imshow(np.abs(Wloc_Vs), vmin=0, vmax=1e-1)
axes[0,1].annotate('W={:3.1f}'.format(8.0), (0.5,0.5), xycoords='axes fraction', ha='center', color='w')

for axe in axes:
    for ax in axe:
        # ax.legend(loc='best')
        ax.xaxis.set_ticklabels([])
        ax.yaxis.set_ticklabels([])
        # ax.xaxis.set_visible(False)
        # ax.yaxis.set_visible(False)
        ax.set_xlabel('Eigenvector coefficients')
        ax.set_ylabel('Eigenvector samples')

fig.tight_layout()
print('Doesn\'t seem to have any discernable patterns.')

## Batch generate data (execution part)

In [None]:
# Batch generate eigenvectors.

k = 1
batches = 100
base_sample = 100000 // k // batches # Divide by k (num_EV)
rand_sample = 50            # Samples per random W.
Ws_main = [0.5, 8]          # Disorder strength W.
Ws_rand = np.random.uniform(0.1, high=5.9, size=(2 * base_sample // rand_sample,))
J  = 1                      # Always = 1
Ls = [10]                   # System sizes L.
ps = [False, True]          # Periodic or not.
et = []                     # Execution time.
Hs_main = [base_sample]*len(Ls) # Number of samples per L per W.
Hs_rand = [rand_sample]*len(Ls) # Number of samples per L per W.
num_EVs = [k]               # Number of eigenvalues near zero to save.

for i in range(batches):
    print('{} | Processing batch {:03d} of {:d}:'.format(dt(), i+1, batches), flush=True)
    print(' ', flush=True)
    for L, num_Hs_m, num_Hs_r in zip(Ls, Hs_main, Hs_rand):
        for num_EV in num_EVs:
            for p in ps:
                start_time = time.time()
                print('{} | Generating training data for L={:02d}...'.format(dt(), L), flush=True)
                batch_gen_EVW_data_main(L, Ws_main, J, p, num_Hs_m, num_EV, save_data=True)
                print('{} | Generating random data for L={:02d}...'.format(dt(), L), flush=True)
                batch_gen_EVW_data_rand(L, Ws_rand, J, p, num_Hs_r, num_EV, save_data=True)
                exec_time = time.time() - start_time
                et.append(exec_time)
                dt = datetime.now(tz=tz).strftime('%Y-%m-%d %H:%M:%S')
                print('{} | Computed: L={:02d} | num_EV={} | periodic={: <5}.'.format(dt(), L, num_EV, str(p)), flush=True)
                print('{} | Execution took {: 8.2f}s or {: 6.2f}min.'.format(dt(), , exec_time, exec_time/60), flush=True)
                print(' ', flush=True)

    if check_shutdown_signal():
        break

## Neural network
See the other notebook for training and prediction.