In [31]:
# Import

import numpy as np
import itertools
import ast
import math
import scipy
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
np.random.seed(0)

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import datasets
from sklearn import svm
from sklearn import linear_model
from sklearn.kernel_ridge import KernelRidge
from sklearn.ensemble import RandomForestRegressor

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import Normalize

np.random.seed(0)

sigI = np.array([[1.0, 0.0j], [0.0j, 1.0]])
sigX = np.array([[0.0j, 1.0], [1.0, 0.0j]])
sigY = np.array([[0.0j, -1.0j], [1.0j, 0.0j]])
sigZ = np.array([[1.0, 0.0j], [0.0j, -1.0]])

N = 50

def kron(ls):
    A = ls[0]
    for X in ls[1:]:
        A = np.kron(A, X)
    return A

def generate_all_zero_state():
    return [np.array([[1.0, 0.0j], [0.0j, 0.0]]) for i in range(N)]

def generate_all_one_state():
    return [np.array([[0.0, 0.0j], [0.0j, 1.0]]) for i in range(N)]

def generate_half_half_state():
    return [np.array([[0.0, 0.0j], [0.0j, 1.0]]) if i < N/2 else np.array([[1.0, 0.0j], [0.0j, 0.0]]) for i in range(N)]

def generate_neel_state():
    return [np.array([[0.0, 0.0j], [0.0j, 1.0]]) if i % 2 == 0 else np.array([[1.0, 0.0j], [0.0j, 0.0]]) for i in range(N)]

def generate_all_plus_state():
    return [np.array([[0.5, 0.5], [0.5, 0.5+0.0j]]) for i in range(N)]

def generate_random_product_state():
    list_rhoi = []
    for i in range(N):
        v = np.random.normal(size=3)
        v /= np.linalg.norm(v)
        rhoi = sigI / 2.0 + (v[0] * sigX / 2.0) + (v[1] * sigY / 2.0) + (v[2] * sigZ / 2.0)
        list_rhoi.append(rhoi)
    return list_rhoi

def twobytwo_to_Pauli(list_rhoi):
    list_rhoi_new = []
    for rhoi in list_rhoi:
        list_rhoi_new.append(np.trace(np.matmul(sigX, rhoi)).real)
        list_rhoi_new.append(np.trace(np.matmul(sigY, rhoi)).real)
        list_rhoi_new.append(np.trace(np.matmul(sigZ, rhoi)).real)
    return list_rhoi_new

def get_RDM_in_Pauli(list_rhoi, k):
    feat_vec = []
    for i in range(N-k+1):
        for list_P in itertools.product([-1, 0, 1, 2], repeat=k):
            val = 1.0
            for c, P in enumerate(list_P):
                if P == -1: continue
                val *= list_rhoi[(3*(i+c))+P]
            assert(np.abs(val.imag) < 1e-7)
            feat_vec.append(val.real)
    return feat_vec

# Train a sparsity-enforcing ML model
def train_sparse_ML(all_states, all_values, test_size = 0.25, random_seed = 0):
    list_of_score = []
    list_of_clf = []
    list_of_bestk = []

    for pos in range(0, len(all_values[0])):
        print("Pos:", pos)

        def twobytwo_to_Pauli(list_rhoi):
            list_rhoi_new = []
            for rhoi in list_rhoi:
                list_rhoi_new.append(np.trace(np.matmul(sigX, rhoi)).real)
                list_rhoi_new.append(np.trace(np.matmul(sigY, rhoi)).real)
                list_rhoi_new.append(np.trace(np.matmul(sigZ, rhoi)).real)
            return list_rhoi_new

        def get_RDM_in_Pauli(list_rhoi, k):
            feat_vec = []
            for i in range(N-k+1):
                for list_P in itertools.product([-1, 0, 1, 2], repeat=k):
                    val = 1.0
                    for c, P in enumerate(list_P):
                        if P == -1: continue
                        val *= list_rhoi[(3*(i+c))+P]
                    assert(np.abs(val.imag) < 1e-7)
                    feat_vec.append(val.real)
            return feat_vec

        best_cv_score = 999.0
        best_clf = None
        best_k = None

        _, test_idx, _, _ = train_test_split(range(len(all_states)), range(len(all_states)), test_size=test_size, random_state=random_seed)

        for k in [1, 2, 3, 4]:
            print("Validate k =", k)
            X, y_true, y_noisy = [], [], []

            for data in zip(all_states, all_values):
                X.append(get_RDM_in_Pauli(data[0], k))
                y_true.append(data[1][pos])
                y_noisy.append((2 * np.random.binomial(500, (data[1][pos]+1)/2, 1)[0] / 500) - 1)

            X = np.array(X)
            y_true = np.array(y_true)
            y_noisy = np.array(y_noisy)

            X_train, X_test, y_train, y_test = train_test_split(X, y_noisy, test_size=test_size, random_state=random_seed)

            ML_method = lambda Cx : linear_model.Lasso(alpha=Cx)
            # ML_method = lambda Cx: linear_model.Ridge(alpha=Cx)

            for alpha in [2**(-15), 2**(-14), 2**(-13), 2**(-12), 2**(-11), 2**(-10), 2**(-9), 2**(-8), 2**(-7), 2**(-6), 2**(-5), 2**(-4), 2**(-3)]:
                score = -np.mean(cross_val_score(ML_method(alpha), X_train, y_train, cv=2, scoring="neg_root_mean_squared_error"))
                print(score)
                if best_cv_score > score:
                    clf = ML_method(alpha).fit(X_train, y_train)

                    best_cv_score = score
                    best_clf = clf
                    best_k = k

                    y_pred = clf.predict(X_test)
                    test_score = np.linalg.norm(y_pred - y_true[test_idx]) / (len(y_pred) ** 0.5)

        print("Scores:", best_cv_score, test_score)
        list_of_score.append(test_score)
        list_of_clf.append(best_clf)
        list_of_bestk.append(best_k)
        
    return list_of_score, list_of_clf, list_of_bestk

# Train a sparsity-enforcing ML model
def train_sparse_ML_transformed(all_X_list, all_values, test_size = 0.25, random_seed = 0):
    list_of_score = []
    list_of_clf = []
    list_of_bestk = []

    for pos in range(0, len(all_values[0])):
#         print("Pos:", pos)

        best_cv_score = 999.0
        best_clf = None
        best_k = None

        _, test_idx, _, _ = train_test_split(range(len(all_values)), range(len(all_values)), test_size=test_size, random_state=random_seed)
        
        for k in [1, 2]:
#             print("Validate k =", k)

            X = all_X_list[k-1]
            
            y_true, y_noisy = [], []
            for data in all_values:
                y_true.append(data[pos])
                y_noisy.append((2 * np.random.binomial(500, (data[pos]+1)/2, 1)[0] / 500) - 1)
            y_true = np.array(y_true)
            y_noisy = np.array(y_noisy)

            X_train, X_test, y_train, y_test = train_test_split(X, y_noisy, test_size=test_size, random_state=random_seed)

            ML_method = lambda Cx : linear_model.Lasso(alpha=Cx)
            # ML_method = lambda Cx: linear_model.Ridge(alpha=Cx)

            for alpha in [2**(-15), 2**(-14), 2**(-13), 2**(-12), 2**(-11), 2**(-10), 2**(-9), 2**(-8), 2**(-7), 2**(-6), 2**(-5), 2**(-4), 2**(-3)]:
                score = -np.mean(cross_val_score(ML_method(alpha), X_train, y_train, cv=2, scoring="neg_root_mean_squared_error"))
#                 print(score)
                if best_cv_score > score:
                    clf = ML_method(alpha).fit(X_train, y_train)

                    best_cv_score = score
                    best_clf = clf
                    best_k = k

                    y_pred = clf.predict(X_test)
                    test_score = np.linalg.norm(y_pred - y_true[test_idx]) / (len(y_pred) ** 0.5)

        print("Scores:", best_cv_score, test_score)
        list_of_score.append(test_score)
        list_of_clf.append(best_clf)
        list_of_bestk.append(best_k)
        
    return list_of_score, list_of_clf, list_of_bestk

def transform_states(all_states):
    all_X_list = []
    
    for k in [1, 2]:
        X = []
        for data in all_states:
            X.append(get_RDM_in_Pauli(data, k))
        all_X_list.append(np.array(X))
    return all_X_list

# XY model with homogeneous field

In [32]:
N = 50
sample_size = 100
all_data_training_set_scaling = []
seed = 0
test_size = 0.5
num_holdout = 1

# XY model with homogeneous field

all_states = []
all_values = []

with open("50spins-oneZ-allt-homogeneous/states.txt") as f:
    for line in f:
        all_states.append(ast.literal_eval(line))

with open("50spins-oneZ-allt-homogeneous/values.txt") as f:
    for line in f:
        all_values.append([ast.literal_eval(line)[6]])

# Train / holdout split
np.random.seed(seed)
sample_idx = np.random.choice(len(all_states), (num_holdout+1)*sample_size, replace=False) # Randomize sampled states
train_states, train_values = np.array(all_states)[
    sample_idx[:sample_size]], np.array(all_values)[sample_idx[:sample_size]]

# I setup the framework for multiple holdout but right now still should be the same as using 
# only one holdout in my current code below
holdout_states_ens, holdout_values_ens = [],[]
for i in range(1, num_holdout+1):
    holdout_states, holdout_values = np.array(all_states)[
        sample_idx[i*sample_size:(i+1)*sample_size]], np.array(all_values)[sample_idx[i*sample_size:(i+1)*sample_size]]
    holdout_states_ens.append(holdout_states)
    holdout_values_ens.append(holdout_values)
h = holdout_states_ens[0]

train_X_list = transform_states(train_states)
holdout_X_list_ens = [transform_states(holdout_states_ens[i]) for i in range(num_holdout)]

In [33]:
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.simplefilter("ignore", category=ConvergenceWarning)

# Obtain classifiers
print("Training clf_train...")
list_of_score_train, list_of_clf_train, list_of_bestk_train = train_sparse_ML_transformed(
    train_X_list, train_values, test_size=test_size, random_seed=seed)
clf_train = list_of_clf_train[0]
k_train = list_of_bestk_train[0]

print("Training clf_holdout...")
clf_holdout_ens = []
score_holdout_ens = []
k_holdout_ens = []
for i in range(num_holdout):
    list_of_score_holdout, list_of_clf_holdout, list_of_bestk_holdout = train_sparse_ML_transformed(
        holdout_X_list_ens[i], holdout_values_ens[i], test_size=test_size, random_seed=seed)
    clf_holdout_ens.append(list_of_clf_holdout[0])
    score_holdout_ens.append(list_of_score_holdout[0])
    k_holdout_ens.append(list_of_bestk_holdout[0])

Training clf_train...
Scores: 0.15589832750540034 0.15131445874505367
Training clf_holdout...
Scores: 0.14870408289542514 0.13186634459285798


In [34]:
import statistics
# Adaptive optimization

def predict_holdout(x):
    # Get holdout prediction
    predictions = []
    for i in range(num_holdout):
        # Get best classifier with best k
        clf_holdout = clf_holdout_ens[i]
        k_holdout = k_holdout_ens[i]
        X_holdout = np.array([get_RDM_in_Pauli(x, k_holdout)])
        predictions.append(clf_holdout.predict(X_holdout))
    return predictions[0][0]

def predict_train(x):
    # Get train prediction
    X_train = np.array([get_RDM_in_Pauli(x, k_train)])
    return clf_train.predict(X_train)[0]

def get_product_state(y):
    # Compute product state from input spherical coordinates
    x = []
    for i in range(N):
        phi = y[i]
        theta = y[N+i]
        x += [np.sin(phi)*np.cos(theta),np.sin(phi)*np.sin(theta),np.cos(phi)] # spherical coordinates
    return x

def objective(y):
    # Sanity check: optimizes absolute difference between train and holdout predictions
    x = get_product_state(y)
    y_train = predict_train(x)
    y_holdout = predict_holdout(x)
    return -abs(y_train - y_holdout)

def objective_train(y):
    # Maximizes train predictions
    x = get_product_state(y)
    y_train = predict_train(x)
    return -y_train

def objective_holdout(y):
    x = get_product_state(y)
    y_holdout = predict_holdout(x)
    return -y_holdout

In [44]:
# Sanity check
def objective_state(x):
    y_train = predict_train(x)
    y_holdout = predict_holdout(x)
    return abs(y_train - y_holdout)

# max_err = 0
# max_idx = 0
# avg_err = 0
# print("Print: max error, index of maximum error, average error")
# np.random.seed(seed)
# for i in range(10000):
#     y0 = np.concatenate((np.random.rand(N)*np.pi,np.random.rand(N)*2*np.pi))
#     if i % 500 == 0:
#         print(f"step {i}: {max_err}, {max_idx}, {avg_err}")
#     err = -objective(y0)
#     if err > max_err:
#         max_err = err
#         max_idx = i
#     avg_err += err / 10000
# print(max_err)

# Sometimes I get something like this, i.e. classifier always outputs the same number
for i in range(10000):
    if i % 500 == 0:
        print(predict_holdout(all_states[i]))

-0.02989498178038802
-0.003952952740798337
-0.003984906875219496
-0.012859176363692045
-0.014430906301153894
-0.042361020431807984
-0.035377841828585624
0.0034870980550325732
-0.008008942019106981
-0.0017956570916227675
0.0003991383187626693
0.010380826212937417
-0.013539911701509161
-0.040601530581937134
0.01369567055487688
0.007523707802490575
-0.007727939983817527
0.0020305517304285946
-0.0331312661791566
-0.03786184743731322


In [53]:
from scipy.optimize import minimize, fmin_powell

# Nelder-mead
np.random.seed(seed)
bnds = tuple([(0,np.pi)]*N+[(0,2*np.pi)]*N)
y0 = np.concatenate((np.random.rand(N)*np.pi,np.random.rand(N)*2*np.pi))
res = minimize(objective_train, y0, method='Powell', bounds=bnds, options={'disp': True})
# res = minimize(objective_train, y0, method='Nelder-Mead', bounds=bnds, options={'disp': True})
# res = minimize(objective_train, y0, method='L-BFGS-B', bounds=bnds, options={'disp': True})
# res = minimize(objective_train, y0, method='trust-constr', bounds=bnds, options={'disp': True})
print(res)

Optimization terminated successfully.
         Current function value: -0.853297
         Iterations: 5
         Function evaluations: 7346
 message: Optimization terminated successfully.
 success: True
  status: 0
     fun: -0.8532967224410052
       x: [ 1.527e+00  1.793e+00 ...  4.720e+00  1.571e+00]
     nit: 5
   direc: [[ 1.000e+00  0.000e+00 ...  0.000e+00  0.000e+00]
           [ 0.000e+00  1.000e+00 ...  0.000e+00  0.000e+00]
           ...
           [ 0.000e+00  0.000e+00 ...  1.000e+00  0.000e+00]
           [ 9.658e-08 -2.862e-07 ... -8.315e-17 -1.507e-18]]
    nfev: 7346


In [54]:
# Print the error, i.e. if large, then we have overfit
state = get_product_state(res.x)
y_train = predict_train(state)
y_holdout = predict_holdout(state)
print(abs(y_train-y_holdout))
print(res.nfev)

0.8701776855146985
7346


Repeat this for many initializations

In [55]:
import warnings
from sklearn.exceptions import ConvergenceWarning
import statistics
from scipy.optimize import minimize
warnings.simplefilter("ignore", category=ConvergenceWarning)

N = 50
test_size = 0.5
num_holdout = 3
sample_size_list = [20, 60, 100, 150, 200, 300, 500]

# XY model with homogeneous field

all_states = []
all_values = []

with open("50spins-oneZ-allt-homogeneous/states.txt") as f:
    for line in f:
        all_states.append(ast.literal_eval(line))

with open("50spins-oneZ-allt-homogeneous/values.txt") as f:
    for line in f:
        all_values.append([ast.literal_eval(line)[6]])
        
table = np.zeros((10,len(sample_size_list)))

for s, sample_size in enumerate(sample_size_list):
    for seed in range(10):
        print("--------------------------------------")
        print(f"ITERATION: sample size {sample_size}, seed {seed}")
        # Train / holdout split
        np.random.seed(seed)
        sample_idx = np.random.choice(len(all_states), (num_holdout+1)*sample_size, replace=False) # Randomize sampled states
        train_states, train_values = np.array(all_states)[
            sample_idx[:sample_size]], np.array(all_values)[sample_idx[:sample_size]]
        
        holdout_states_ens, holdout_values_ens = [],[]
        for i in range(1, num_holdout+1):
            holdout_states, holdout_values = np.array(all_states)[
                sample_idx[i*sample_size:(i+1)*sample_size]], np.array(all_values)[sample_idx[i*sample_size:(i+1)*sample_size]]
            holdout_states_ens.append(holdout_states)
            holdout_values_ens.append(holdout_values)

        # Transform states
        train_X_list = transform_states(train_states)
        holdout_X_list_ens = [transform_states(holdout_states_ens[i]) for i in range(num_holdout)]
        
        # Obtain classifiers
        print("Training clf_train...")
        list_of_score_train, list_of_clf_train, list_of_bestk_train = train_sparse_ML_transformed(
            train_X_list, train_values, test_size=test_size, random_seed=seed)
        clf_train = list_of_clf_train[0]
        k_train = list_of_bestk_train[0]

        print("Training clf_holdout...")
        clf_holdout_ens = []
        score_holdout_ens = []
        k_holdout_ens = []
        for i in range(num_holdout):
            list_of_score_holdout, list_of_clf_holdout, list_of_bestk_holdout = train_sparse_ML_transformed(
                holdout_X_list_ens[i], holdout_values_ens[i], test_size=test_size, random_seed=seed)
            clf_holdout_ens.append(list_of_clf_holdout[0])
            score_holdout_ens.append(list_of_score_holdout[0])
            k_holdout_ens.append(list_of_bestk_holdout[0])
        
        def predict_holdout(x):
            # Get holdout prediction
            predictions = []
            for i in range(num_holdout):
                # Get best classifier with best k
                clf_holdout = clf_holdout_ens[i]
                k_holdout = k_holdout_ens[i]
                X_holdout = np.array([get_RDM_in_Pauli(x, k_holdout)])
                predictions.append(clf_holdout.predict(X_holdout))
            return statistics.median(predictions)[0]

        def predict_train(x):
            # Get train prediction
            X_train = np.array([get_RDM_in_Pauli(x, k_train)])
            return clf_train.predict(X_train)[0]

        def get_product_state(y):
            # Compute product state from input spherical coordinates
            x = []
            for i in range(N):
                phi = y[i]
                theta = y[N+i]
                x += [np.sin(phi)*np.cos(theta),np.sin(phi)*np.sin(theta),np.cos(phi)] # spherical coordinates
            return x

        def objective(y):
            # Sanity check: optimizes absolute difference between train and holdout predictions
            x = get_product_state(y)
            y_train = predict_train(x)
            y_holdout = predict_holdout(x)
            return -abs(y_train - y_holdout)

        def objective_train(y):
            # Maximizes train predictions
            x = get_product_state(y)
            y_train = predict_train(x)
            return -y_train

        def objective_holdout(y):
            x = get_product_state(y)
            y_holdout = predict_holdout(x)
            return -y_holdout
        
        # Adaptive optimization
        np.random.seed(seed)
        bnds = tuple([(0,np.pi)]*N+[(0,2*np.pi)]*N) # spherical coords
        y0 = np.concatenate((np.random.rand(N)*np.pi,np.random.rand(N)*2*np.pi)) # initial guess
        res = minimize(objective_train, y0, method='Powell', bounds=bnds, options={'disp': False})
        num_eval = res.nfev
        num_iter = res.nit
        # Adaptive error
        state = get_product_state(res.x)
        y_train = predict_train(state)
        y_holdout = predict_holdout(state)
        adapt_err = abs(y_train-y_holdout)
        # Nonadaptive error
        nonadapt_err = 0
        np.random.seed(seed)
        # slightly sus, i.e. maybe should do based on num_eval?
        for i in range(num_eval):
            y0 = np.concatenate((np.random.rand(N)*np.pi,np.random.rand(N)*2*np.pi))
            err = -objective(y0)
            if err > nonadapt_err:
                nonadapt_err = err
        print("Number of iterations: ", num_iter)
        print("Nonadaptive error: ", nonadapt_err)
        print("Adaptive error: ", adapt_err)
        table[seed,s] = adapt_err - nonadapt_err

--------------------------------------
ITERATION: sample size 20, seed 0
Training clf_train...
Scores: 0.16462395921915518 0.12911599216449912
Training clf_holdout...
Scores: 0.16864085852093336 0.1638682023767973
Scores: 0.0863293194447738 0.20104740214809017
Scores: 0.14473888498874282 0.16870663097824282
Number of iterations:  1
Nonadaptive error:  0.0532
Adaptive error:  0.0532
--------------------------------------
ITERATION: sample size 20, seed 1
Training clf_train...
Scores: 0.2072709887596873 0.21446674440225758
Training clf_holdout...
Scores: 0.08619344518090447 0.08878962089002698
Scores: 0.08500319447232102 0.17236315656378495
Scores: 0.1449320146221621 0.16573287256100375
Number of iterations:  1
Nonadaptive error:  0.23273078130859914
Adaptive error:  0.1252
--------------------------------------
ITERATION: sample size 20, seed 2
Training clf_train...
Scores: 0.1809684234136905 0.18331344719082018
Training clf_holdout...
Scores: 0.1403567738076975 0.08566088281898569
Scor

Training clf_train...
Scores: 0.15589832750540034 0.15131445874505367
Training clf_holdout...
Scores: 0.14870408289542514 0.13186634459285798
Scores: 0.13734130296602953 0.1646625965324173
Scores: 0.14178673063317782 0.15393353032467216
Number of iterations:  5
Nonadaptive error:  0.4951881640908899
Adaptive error:  0.8431367224410052
--------------------------------------
ITERATION: sample size 100, seed 1
Training clf_train...
Scores: 0.1375474375001225 0.13718042207371414
Training clf_holdout...
Scores: 0.13555574058699071 0.15245610607401194
Scores: 0.11632517897192995 0.1364837021705868
Scores: 0.14261656148971202 0.12830740133625182
Number of iterations:  3
Nonadaptive error:  0.34852333110356876
Adaptive error:  0.5767345374679959
--------------------------------------
ITERATION: sample size 100, seed 2
Training clf_train...
Scores: 0.1478818171210039 0.1286202090263973
Training clf_holdout...
Scores: 0.13537471906774196 0.1569042127146887
Scores: 0.14380480813076657 0.151184327

Training clf_train...
Scores: 0.14665970304927345 0.1546261114204475
Training clf_holdout...
Scores: 0.15323832080019648 0.09777384937748847
Scores: 0.13279389904077338 0.12914604689016335
Scores: 0.12834693236556408 0.13222067402501517
Number of iterations:  1
Nonadaptive error:  0.3243987031740513
Adaptive error:  0.3649345299141259
--------------------------------------
ITERATION: sample size 200, seed 1
Training clf_train...
Scores: 0.1401101410576877 0.13347575744401455
Training clf_holdout...
Scores: 0.1389489629413892 0.10536219673773928
Scores: 0.1401394768764822 0.11485893801223553
Scores: 0.15897617657141874 0.14833513874832796
Number of iterations:  5
Nonadaptive error:  0.4439648329103598
Adaptive error:  0.9499980662467538
--------------------------------------
ITERATION: sample size 200, seed 2
Training clf_train...
Scores: 0.13230909520948106 0.13321333624582754
Training clf_holdout...
Scores: 0.1378529745718604 0.13011832505519666
Scores: 0.12474773422861843 0.112497046

Training clf_train...
Scores: 0.11236239948795357 0.08648922108937367
Training clf_holdout...
Scores: 0.12652912246652248 0.09225199727075499
Scores: 0.1027126122869448 0.08243883008596879
Scores: 0.11207402966128648 0.08525050599132933
Number of iterations:  4
Nonadaptive error:  0.1947105416411315
Adaptive error:  0.060734067549909176
--------------------------------------
ITERATION: sample size 500, seed 1
Training clf_train...
Scores: 0.10767408707152004 0.08155067491043927
Training clf_holdout...
Scores: 0.12128490113247478 0.09377931211753657
Scores: 0.11107310651392623 0.09742864601564438
Scores: 0.11837350240558399 0.08908663613463784
Number of iterations:  19
Nonadaptive error:  0.2563370082699524
Adaptive error:  0.38627837123453346
--------------------------------------
ITERATION: sample size 500, seed 2
Training clf_train...
Scores: 0.11878784385839575 0.08809306499705702
Training clf_holdout...
Scores: 0.11792707550524371 0.09357178709330609
Scores: 0.1196443896975045 0.09

In [56]:
df_xy = pd.DataFrame(data=table, columns=[20, 60, 100, 150, 200, 300, 500])
df_xy

Unnamed: 0,20,60,100,150,200,300,500
0,0.0,0.2148908,0.347949,0.470945,0.040536,-0.106924,-0.133976
1,-0.107531,-0.01182229,0.228211,0.000278,0.506033,-0.142239,0.129941
2,-0.143801,-0.04209989,-0.013528,0.053678,-0.089497,0.01544,-0.001957
3,0.0,0.0,-0.050607,0.113708,-0.174694,0.017304,-0.017593
4,-0.031628,-0.003073195,0.264285,0.067247,-0.36015,0.044943,-0.031897
5,0.006097,0.3520529,-0.139356,0.023468,0.005132,0.02522,0.234201
6,2e-06,0.2233623,0.032379,0.088815,-0.034389,-0.039525,-0.060063
7,-0.1862,-0.1382878,0.244494,0.246064,0.012358,-0.159537,0.002096
8,-0.208983,3.238954e-09,0.081871,0.244426,0.077264,0.066682,-0.041078
9,0.120924,-0.04518474,-0.058407,0.737773,-0.133465,0.022939,0.026361


In [57]:
df_xy.mean(axis=0)

20    -0.055112
60     0.054984
100    0.093729
150    0.204640
200   -0.015087
300   -0.025570
500    0.010604
dtype: float64

# Ising with homogeneous field

In [None]:
warnings.simplefilter("ignore", category=ConvergenceWarning)

N = 50
test_size = 0.5
num_holdout = 3

# Ising model with homogeneous field

all_states = []
all_values = []

with open("50spins-oneZ-allt-homogeneous-Ising/states.txt") as f:
    for line in f:
        all_states.append(ast.literal_eval(line))

with open("50spins-oneZ-allt-homogeneous-Ising/values.txt") as f:
    for line in f:
        all_values.append([ast.literal_eval(line)[6]])
        
table = np.zeros((10,6))

for s, sample_size in enumerate([20, 60, 100, 150, 200, 500]):
    for seed in range(10):
        print("--------------------------------------")
        print(f"ITERATION: sample size {sample_size}, seed {seed}")
        # Train / holdout split
        np.random.seed(seed)
        sample_idx = np.random.choice(len(all_states), (num_holdout+1)*sample_size, replace=False) # Randomize sampled states
        train_states, train_values = np.array(all_states)[
            sample_idx[:sample_size]], np.array(all_values)[sample_idx[:sample_size]]
        
        holdout_states_ens, holdout_values_ens = [],[]
        for i in range(1, num_holdout+1):
            holdout_states, holdout_values = np.array(all_states)[
                sample_idx[i*sample_size:(i+1)*sample_size]], np.array(all_values)[sample_idx[i*sample_size:(i+1)*sample_size]]
            holdout_states_ens.append(holdout_states)
            holdout_values_ens.append(holdout_values)

        # Transform states
        train_X_list = transform_states(train_states)
        holdout_X_list_ens = [transform_states(holdout_states_ens[i]) for i in range(num_holdout)]
        
        # Obtain classifiers
        print("Training clf_train...")
        list_of_score_train, list_of_clf_train, list_of_bestk_train = train_sparse_ML_transformed(
            train_X_list, train_values, test_size=test_size, random_seed=seed)
        clf_train = list_of_clf_train[0]
        k_train = list_of_bestk_train[0]

        print("Training clf_holdout...")
        clf_holdout_ens = []
        score_holdout_ens = []
        k_holdout_ens = []
        for i in range(num_holdout):
            list_of_score_holdout, list_of_clf_holdout, list_of_bestk_holdout = train_sparse_ML_transformed(
                holdout_X_list_ens[i], holdout_values_ens[i], test_size=test_size, random_seed=seed)
            clf_holdout_ens.append(list_of_clf_holdout[0])
            score_holdout_ens.append(list_of_score_holdout[0])
            k_holdout_ens.append(list_of_bestk_holdout[0])
        
        def predict_holdout(x):
            # Get holdout prediction
            predictions = []
            for i in range(num_holdout):
                # Get best classifier with best k
                clf_holdout = clf_holdout_ens[i]
                k_holdout = k_holdout_ens[i]
                X_holdout = np.array([get_RDM_in_Pauli(x, k_holdout)])
                predictions.append(clf_holdout.predict(X_holdout))
            return statistics.median(predictions)[0]

        def predict_train(x):
            # Get train prediction
            X_train = np.array([get_RDM_in_Pauli(x, k_train)])
            return clf_train.predict(X_train)[0]

        def get_product_state(y):
            # Compute product state from input spherical coordinates
            x = []
            for i in range(N):
                phi = y[i]
                theta = y[N+i]
                x += [np.sin(phi)*np.cos(theta),np.sin(phi)*np.sin(theta),np.cos(phi)] # spherical coordinates
            return x

        def objective(y):
            # Sanity check: optimizes absolute difference between train and holdout predictions
            x = get_product_state(y)
            y_train = predict_train(x)
            y_holdout = predict_holdout(x)
            return -abs(y_train - y_holdout)

        def objective_train(y):
            # Maximizes train predictions
            x = get_product_state(y)
            y_train = predict_train(x)
            return -y_train

        def objective_holdout(y):
            x = get_product_state(y)
            y_holdout = predict_holdout(x)
            return -y_holdout
        
        # Adaptive optimization
        np.random.seed(seed)
        bnds = tuple([(0,np.pi)]*N+[(0,2*np.pi)]*N) # spherical coords
        y0 = np.concatenate((np.random.rand(N)*np.pi,np.random.rand(N)*2*np.pi)) # initial guess
        res = minimize(objective_train, y0, method='L-BFGS-B', bounds=bnds, options={'disp': False})
        num_eval = res.nfev
        num_iter = res.nit
        # Adaptive error
        state = get_product_state(res.x)
        y_train = predict_train(state)
        y_holdout = predict_holdout(state)
        adapt_err = abs(y_train-y_holdout)
        # Nonadaptive error
        nonadapt_err = 0
        np.random.seed(seed)
        # slightly sus, i.e. maybe should do based on num_eval?
        for i in range(num_iter):
            y0 = np.concatenate((np.random.rand(N)*np.pi,np.random.rand(N)*2*np.pi))
            err = -objective(y0)
            if err > nonadapt_err:
                nonadapt_err = err
        print("Number of iterations: ", num_iter)
        print("Nonadaptive error: ", nonadapt_err)
        print("Adaptive error: ", adapt_err)
        table[seed,s] = adapt_err - nonadapt_err

--------------------------------------
ITERATION: sample size 20, seed 0
Training clf_train...
Scores: 0.09148684550355596 0.10356823968380986
Training clf_holdout...
Scores: 0.09000554335199401 0.12394863867265815
Scores: 0.15886182714772337 0.24736171195576845
Scores: 0.1036874941148683 0.12195346956958858
Number of iterations:  21
Nonadaptive error:  0.1322223671244211
Adaptive error:  0.27261770293731835
--------------------------------------
ITERATION: sample size 20, seed 1
Training clf_train...
Scores: 0.09337342602775593 0.15645991895709516
Training clf_holdout...
Scores: 0.13984585663163449 0.09709296278668628
Scores: 0.11949184631430475 0.14147948750380598
Scores: 0.0403799567443455 0.10935942101906534
Number of iterations:  44
Nonadaptive error:  0.25233303465688395
Adaptive error:  0.3395264744682905
--------------------------------------
ITERATION: sample size 20, seed 2
Training clf_train...
Scores: 0.059231291468772915 0.10853695515858641
Training clf_holdout...
Scores: 

In [None]:
df_ising_homog = pd.DataFrame(data=table, columns=[20, 60, 100, 150, 200, 500])
df_ising_homog