# Predicting ground states for 2D Heisenberg models

In [1]:
# Basic functionalities
import numpy as np
import random
import copy
import ast
import datetime as dt
from timeit import default_timer as timer
from os import path

# Neural tangent kernel
import jax
from neural_tangents import stax

# Traditional ML methods and techniques
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import datasets
from sklearn import svm
from sklearn import linear_model
from sklearn.kernel_ridge import KernelRidge
from sklearn.ensemble import RandomForestRegressor

In [20]:
# Load data

length = 7 # length = 4, 5, 6, 7, 8, 9 for orig; only 4, 5, 6, 7 for new
width = 5

shadow_size = 50 # up to 1000

Xfull = [] # Shape = (number of data) x (number of params)
Ytrain = [] # Shape = (number of data) x (number of pairs), estimated 2-point correlation functions
Yfull = [] # Shape = (number of data) x (number of pairs), exact 2-point correlation functions

def get_path_prefix(data='orig'):
    prefix = './heisenberg_data/heisenberg_{}x{}'.format(length, width)
    if data == 'new':
        prefix = './new_data/data_{}x{}/simulation_{}x{}'.format(length, width, length, width)
    return prefix
    
data_name = 'new'
prefix = get_path_prefix(data=data_name)

for idx in range(1, 301):
    if path.exists('{}_id{}_XX.txt'.format(prefix, idx)) == False:
        continue
    with open('{}_id{}_samples.txt'.format(prefix, idx), 'r') as f:
        single_data = []
        classical_shadow_big = [[int(c) for i, c in enumerate(line.split("\t"))] for line in f]
        classical_shadow = classical_shadow_big[0:shadow_size]
        for i in range(length * 5):
            for j in range(length * 5):
                if i == j:
                    single_data.append(1.0)
                    continue
                corr = 0
                cnt = 0
                for shadow in classical_shadow:
                    if shadow[i] // 2 == shadow[j] // 2:
                        corr += 3 if shadow[i] % 2 == shadow[j] % 2 else -3
                    cnt += 1
                single_data.append(corr / cnt)
        Ytrain.append(single_data)
    with open('{}_id{}_XX.txt'.format(prefix, idx), 'r') as f:
        single_data = []
        for line in f:
            for i, c in enumerate(line.split("\t")):
                v = float(c)
                single_data.append(v)
        Yfull.append(single_data)
    with open('{}_id{}_couplings.txt'.format(prefix, idx), 'r') as f:
        single_data = []
        for line in f:
            for i, c in enumerate(line.split("\t")):
                v = float(c)
                single_data.append(v)
        Xfull.append(single_data)

In [21]:
# Print information

Xfull = np.array(Xfull)
print("number of data (N) * number of params (m) =", Xfull.shape)
Ytrain = np.array(Ytrain)
Yfull = np.array(Yfull)
print("number of data (N) * number of pairs =", Yfull.shape)

# print(Xfull[0])
# print(Yfull[0].reshape((length * width, length * width)))

number of data (N) * number of params (m) = (300, 58)
number of data (N) * number of pairs = (300, 1225)


In [202]:
# Normalize Xfull

xmin = np.amin(Xfull)
xmax = np.amax(Xfull)

# normalize so that all entries are between -1 and 1 using min-max feature scaling
Xfull_norm = np.array(list(map(lambda row : list(map(lambda x : -1 + 2*(x - xmin)/(xmax - xmin), row)), Xfull)))

print(Xfull_norm[0])

[-0.02014906  0.2717357  -0.06096961 -0.23706368  0.31690494  0.41663855
 -0.66754314 -0.50492558 -0.59081781 -0.0386828   0.07612815 -0.98220066
 -0.89010411  0.58685137  0.07249359 -0.81369117 -0.5424053   0.0807436
 -0.90632894  0.39767257  0.57647512  0.59998379  0.56864021  0.31006697
  0.3619183  -0.94362459 -0.84345385 -0.47786594 -0.78827037 -0.37955961
  0.54432053]


In [203]:
# Categorizing pairs of qubits by distance

# grid of qubits
grid = np.array(range(1, length * width + 1)).reshape((length, width))

# generate all edges in grid in same order as Xfull
all_edges = []
for i in range(0, length):
    for j in range(1, width + 1):
        if i != length - 1:
            all_edges.append((width * i + j, width * (i + 1) + j))
        if j != width:
            all_edges.append((width * i + j, width * i + j + 1))
print(all_edges)
            
def calc_distance(q1, q2):
    # Given two qubits q1, q2 (1-indexed integers) in length x width grid
    # Output l1 distance between q1 and q2 in grid

    pos1 = np.array(np.where(grid == q1)).T[0]
    pos2 = np.array(np.where(grid == q2)).T[0]

    return np.abs(pos1[0] - pos2[0]) + np.abs(pos1[1] - pos2[1])

def get_nearby_qubit_pairs(d):
    # Given distance d > 0
    # Output all pairs of qubits that are within distance d of each other
    
    if d == 1:
        return all_edges
    
    qubit_pairs = []
    for q1 in range(1, length * width + 1):
        for q2 in range(1, length * width + 1):
            dist = calc_distance(q1, q2)
            pair = tuple(sorted((q1, q2)))
            if dist == d and pair not in qubit_pairs:
                qubit_pairs.append(pair)
    
    return qubit_pairs
            

[(1, 6), (1, 2), (2, 7), (2, 3), (3, 8), (3, 4), (4, 9), (4, 5), (5, 10), (6, 11), (6, 7), (7, 12), (7, 8), (8, 13), (8, 9), (9, 14), (9, 10), (10, 15), (11, 16), (11, 12), (12, 17), (12, 13), (13, 18), (13, 14), (14, 19), (14, 15), (15, 20), (16, 17), (17, 18), (18, 19), (19, 20)]


In [81]:
# Finding local patches of a given radius

def get_local_region_qubits(q, delta1):
    # Given a qubit q (1-indexed integer) in length x width grid and radius delta1
    # delta1 = -1 if all qubits are in local region
    # Output list of qubits (1-indexed integers) within a radius of delta1 of q
    
    if delta1 == 0:
        return [q]
    elif delta1 == -1:
        return list(range(1, length * width + 1))
    
    local_qubits = []
    for q2 in range(1, length * width + 1):
        dist = calc_distance(q, q2)
        
        if dist <= delta1:
            local_qubits.append(q2)
    
    return local_qubits

def get_local_region_edges(q1, q2, delta1):
    # Given two qubits q1, q2 (1-indexed integers) in length x width grid and radius delta1
    # delta1 = -1 if all qubits are in local region
    # Output list of tuples of qubits (1-indexed integers) corresponding to edges in local region of radius delta1

    if delta1 == 0:
        return [(q1, q2)]
    elif delta1 == -1:
        return all_edges

    local_qubits = list(set(get_local_region_qubits(q1, delta1) + get_local_region_qubits(q2, delta1)))
    
    local_edges = []
    for edge in all_edges:
        (q1, q2) = edge
        if q1 in local_qubits and q2 in local_qubits:
            local_edges.append(edge)

    return local_edges

def get_local_region_params(q1, q2, delta1, data, i):
    # Given two qubits q1, q2 (1-indexed integers) in length x width grid, radius delta1, and input data (i.e., Xfull)
    # delta1 = -1 if all qubits are considered nearby
    # Output data but only for parameters corresponding to edges within radius delta1
    
    edges = get_local_region_edges(q1, q2, delta1)
    
    indices = [all_edges.index(edge) for edge in edges]
    
    return np.array([data[i][j] for j in sorted(indices)])
    

In [90]:
print('edges: ' + str(get_local_region_edges(1,2,1)))
print('params: ' + str(get_local_region_params(1,2,1, Xfull_norm, 0)))

edges: [(1, 6), (1, 2), (2, 7), (2, 3), (6, 7)]
params: [ 0.18167065  0.53366212  0.13244408 -0.07991235  0.29777386]


In [216]:
# Feature mapping

def get_feature_vectors(delta1, R, data, omega, gamma=1.0, q1=0, q2=1):
    # Given radius delta1 and hyperparameter R (number of nonlinear features per local region), input data, and fixed randomness omega
    # delta1 = -1 if all qubits are considered nearby
    # Output concatenated feature vectors
    
    # to store all concatenated feature vectors
    all_feature_vectors = []
    
    for i in range(len(data)):
        feature_vector_concat = []
        # iterate over all possible local regions
        n = len(all_edges)
        for k in range(n):
            (q1, q2) = all_edges[k]
            data_local = get_local_region_params(q1, q2, delta1, data, i)
            m_local = len(data_local)

            # do nonlinear feature map on each vector in data_local
            feature_vector = []

            for j in range(R):
                omega_j = omega[k][j]
                val = np.exp(np.dot(omega_j, data_local) * gamma / (m_local ** 0.5) * 1j)
                feature_vector.append(np.real(val))
                feature_vector.append(np.imag(val))

            # concatenate feature vectors together
            feature_vector_concat += feature_vector
            
        all_feature_vectors.append(feature_vector_concat)
        
    # note all_feature_vectors is of size number of data (N) x (2 * R * number of local regions)
    return np.array(all_feature_vectors)
        

In [221]:
# Training and testing algorithm

# set size of local region
delta1 = 0

# set max number of feature entries
max_R = 1000

# set of pairs of qubits we care about predicting correlation function for
d = 1
qubits = get_nearby_qubit_pairs(d)

# set test size
test_size = 0.4

train_idx, test_idx, _, _ = train_test_split(range(len(Xfull)), range(len(Xfull)), test_size=test_size, random_state=0)

# generate omega to pass into feature mapping
omega = []
for (q1, q2) in all_edges: # TODO: change this as well when changing feature mapping
    m_local = len(get_local_region_edges(q1, q2, delta1))
    omega_sub = []
    for j in range(max_R):
        omega_sub.append(np.random.normal(0, 1, m_local))
    omega.append(omega_sub)

#with open('./results/{}_data_shadow_size={}/results_{}x{}_{}_data_lasso_R=0-320_C=-13--9_gamma=0.4-1.0_delta1={}_local_feature_qubits{}.txt'.format(data_name, shadow_size, length, width, data_name, delta1, d), 'w') as f:
for (q1, q2) in qubits[8:9]:
    print('(q1, q2) =', (q1, q2))
    #print('(q1, q2) =', (q1, q2), file=f)

    def train_and_predict():
        # consider the pair (q1, q2)
        global q1, q2

        # training data (estimated from measurement data)
        y = np.array([Ytrain[i].reshape((length * width, length * width))[q1 - 1][q2 - 1] for i in range(len(Xfull))])
        X_train, X_test, y_train, y_test = train_test_split(Xfull_norm, y, test_size=test_size, random_state=0)

        # testing data (exact expectation values)
        y_clean = np.array([Yfull[i].reshape((length * width, length * width))[q1 - 1][q2 - 1] for i in range(len(Xfull))])
        _, _, _, y_test_clean = train_test_split(Xfull_norm, y_clean, test_size=test_size, random_state=0)

        # use cross validation to find the best hyperparameters
        best_cv_score, test_score = 999.0, 999.0
        ML_method = lambda Cx : linear_model.Lasso(alpha=Cx, max_iter=30000)
        # ML_method = lambda Cx: KernelRidge(kernel='linear', alpha=Cx)

        for R in [5, 10, 20, 40, 80]:
            print(R)
            for gamma in [0.4, 0.5, 0.6, 0.65, 0.7, 0.75, 1.0]:
                # feature mapping
                Xfeature_train = get_feature_vectors(delta1, R, X_train, omega, gamma, q1, q2)
                Xfeature_test = get_feature_vectors(delta1, R, X_test, omega, gamma, q1, q2)

                for C in [2**(-12), 2**(-11), 2**(-10), 2**(-9), 2**(-8), 2**(-7)]:
                    score = -np.mean(cross_val_score(ML_method(C), Xfeature_train, y_train, cv=5, scoring="neg_root_mean_squared_error"))
                    if best_cv_score > score:
                        clf = ML_method(C).fit(Xfeature_train, y_train.ravel())
                        test_score = np.linalg.norm(clf.predict(Xfeature_test).ravel() - y_test_clean.ravel()) / (len(y_test) ** 0.5)
                        best_cv_score = score
                        #print(clf.coef_)
                        coef = clf.coef_.reshape((len(all_edges), 2 * R))
                        
                        print(list(zip(all_edges, np.linalg.norm(coef, axis=1))))
                        print(R, gamma, C, score, test_score)
                        #print(R, gamma, C, score, test_score, file=f)

        return best_cv_score, test_score

    #print(train_and_predict(), file=f)
    print(train_and_predict())

(q1, q2) = (5, 10)
5
[((1, 6), 0.053595340814637724), ((1, 2), 0.04021351626476746), ((2, 7), 0.027281502178224573), ((2, 3), 0.03504675567317304), ((3, 8), 0.07688562508428554), ((3, 4), 0.13421304959663102), ((4, 9), 0.9640757267263806), ((4, 5), 0.21396885738837754), ((5, 10), 0.7527127392111249), ((6, 11), 0.002470349890239215), ((6, 7), 0.09935371714121025), ((7, 12), 0.12484990488569246), ((7, 8), 0.02335888727720967), ((8, 13), 0.013712033765945372), ((8, 9), 0.12175700084048097), ((9, 14), 0.10077795163695377), ((9, 10), 0.278929969514324), ((10, 15), 0.7088487963730341), ((11, 16), 0.053107136859272146), ((11, 12), 0.1743569465120368), ((12, 17), 0.040002687523060014), ((12, 13), 0.01805395918249298), ((13, 18), 0.09778756993565206), ((13, 14), 0.05054445042407667), ((14, 19), 0.0036353624842403224), ((14, 15), 0.005943804882595105), ((15, 20), 0.1653777605336501), ((16, 17), 0.009519410131453751), ((17, 18), 0.17011973842283118), ((18, 19), 0.0), ((19, 20), 0.1350573945388574

In [206]:
# Training and testing algorithm

# set size of local region
delta1 = 1

# set max number of feature entries
max_R = 1000

# set of pairs of qubits we care about predicting correlation function for
d = 1
qubits = get_nearby_qubit_pairs(d)

# set test size
test_size = 0.4

train_idx, test_idx, _, _ = train_test_split(range(len(Xfull)), range(len(Xfull)), test_size=test_size, random_state=0)

# generate omega to pass into feature mapping
omega = []
for (q1, q2) in all_edges: # TODO: change this as well when changing feature mapping
    m_local = len(get_local_region_edges(q1, q2, delta1))
    omega_sub = []
    for j in range(max_R):
        omega_sub.append(np.random.normal(0, 1, m_local))
    omega.append(omega_sub)

#with open('./results/{}_data_shadow_size={}/results_{}x{}_{}_data_lasso_R=0-320_C=-13--9_gamma=0.4-1.0_delta1={}_local_feature_qubits{}.txt'.format(data_name, shadow_size, length, width, data_name, delta1, d), 'w') as f:
for (q1, q2) in qubits[7:8]:
    print('(q1, q2) =', (q1, q2))
    #print('(q1, q2) =', (q1, q2), file=f)

    def train_and_predict():
        # consider the pair (q1, q2)
        global q1, q2

        # training data (estimated from measurement data)
        y = np.array([Ytrain[i].reshape((length * width, length * width))[q1 - 1][q2 - 1] for i in range(len(Xfull))])
        X_train, X_test, y_train, y_test = train_test_split(Xfull_norm, y, test_size=test_size, random_state=0)

        # testing data (exact expectation values)
        y_clean = np.array([Yfull[i].reshape((length * width, length * width))[q1 - 1][q2 - 1] for i in range(len(Xfull))])
        _, _, _, y_test_clean = train_test_split(Xfull_norm, y_clean, test_size=test_size, random_state=0)

        # use cross validation to find the best hyperparameters
        best_cv_score, test_score = 999.0, 999.0
        ML_method = lambda Cx : linear_model.Lasso(alpha=Cx, max_iter=100000)
        # ML_method = lambda Cx: KernelRidge(kernel='linear', alpha=Cx)

        for R in [0, 5, 10, 20, 40, 80, 160, 320]:
            for gamma in [0.4, 0.5, 0.6, 0.65, 0.7, 0.75, 1.0]:
                # feature mapping
                Xfeature_train = get_feature_vectors(delta1, R, X_train, omega, gamma, q1, q2)
                Xfeature_test = get_feature_vectors(delta1, R, X_test, omega, gamma, q1, q2)

                for C in [2**(-13), 2**(-12), 2**(-11), 2**(-10), 2**(-9)]:
                    score = -np.mean(cross_val_score(ML_method(C), Xfeature_train, y_train, cv=5, scoring="neg_root_mean_squared_error"))
                    if best_cv_score > score:
                        clf = ML_method(C).fit(Xfeature_train, y_train.ravel())
                        test_score = np.linalg.norm(clf.predict(Xfeature_test).ravel() - y_test_clean.ravel()) / (len(y_test) ** 0.5)
                        best_cv_score = score
                        print(R, gamma, C, score, test_score)
                        #print(R, gamma, C, score, test_score, file=f)

        return best_cv_score, test_score

    #print(train_and_predict(), file=f)
    print(train_and_predict())

(q1, q2) = (4, 5)
0 0.4 0.0001220703125 0.249689886220962 0.24969465470979296
5 0.4 0.0001220703125 0.14271310672603807 0.14047354251508498
5 0.4 0.000244140625 0.1314114257197012 0.12540542819407136
5 0.4 0.00048828125 0.12123482352749315 0.12209969250668187
5 0.4 0.0009765625 0.11314618346030228 0.12102611279360974
5 0.5 0.001953125 0.11292876186530872 0.11997474710373347
10 0.4 0.0009765625 0.11177700164581035 0.11878038952934031
10 0.4 0.001953125 0.10952901389655374 0.11398372710787537
20 0.4 0.001953125 0.1065944023340896 0.11180554432302076


KeyboardInterrupt: 

In [191]:
# Training and testing algorithm

# set size of local region
delta1 = 2

# set max number of feature entries
max_R = 1000

# set of pairs of qubits we care about predicting correlation function for
d = 1
qubits = get_nearby_qubit_pairs(d)

# set test size
test_size = 0.4

train_idx, test_idx, _, _ = train_test_split(range(len(Xfull)), range(len(Xfull)), test_size=test_size, random_state=0)

# generate omega to pass into feature mapping
omega = []
for (q1, q2) in all_edges: # TODO: change this as well when changing feature mapping
    m_local = len(get_local_region_edges(q1, q2, delta1))
    omega_sub = []
    for j in range(max_R):
        omega_sub.append(np.random.normal(0, 1, m_local))
    omega.append(omega_sub)

with open('./results/{}_data_shadow_size={}/results_{}x{}_{}_data_lasso_R=0-320_C=-13--9_gamma=0.4-1.0_delta1={}_local_feature_qubits{}.txt'.format(data_name, shadow_size, length, width, data_name, delta1, d), 'w') as f:
    for (q1, q2) in qubits[7:8]:
        print('(q1, q2) =', (q1, q2))
        print('(q1, q2) =', (q1, q2), file=f)

        def train_and_predict():
            # consider the pair (q1, q2)
            global q1, q2

            # training data (estimated from measurement data)
            y = np.array([Ytrain[i].reshape((length * width, length * width))[q1 - 1][q2 - 1] for i in range(len(Xfull))])
            X_train, X_test, y_train, y_test = train_test_split(Xfull_norm, y, test_size=test_size, random_state=0)

            # testing data (exact expectation values)
            y_clean = np.array([Yfull[i].reshape((length * width, length * width))[q1 - 1][q2 - 1] for i in range(len(Xfull))])
            _, _, _, y_test_clean = train_test_split(Xfull_norm, y_clean, test_size=test_size, random_state=0)

            # use cross validation to find the best hyperparameters
            best_cv_score, test_score = 999.0, 999.0
            ML_method = lambda Cx : linear_model.Lasso(alpha=Cx, max_iter=100000)
            # ML_method = lambda Cx: KernelRidge(kernel='linear', alpha=Cx)
            
            for R in [0, 5, 10, 20, 40, 80, 160, 320]:
                for gamma in [0.4, 0.5, 0.6, 0.65, 0.7, 0.75, 1.0]:
                    # feature mapping
                    Xfeature_train = get_feature_vectors(delta1, R, X_train, omega, gamma, q1, q2)
                    Xfeature_test = get_feature_vectors(delta1, R, X_test, omega, gamma, q1, q2)

                    for C in [2**(-13), 2**(-12), 2**(-11), 2**(-10), 2**(-9)]:
                        score = -np.mean(cross_val_score(ML_method(C), Xfeature_train, y_train, cv=5, scoring="neg_root_mean_squared_error"))
                        if best_cv_score > score:
                            clf = ML_method(C).fit(Xfeature_train, y_train.ravel())
                            test_score = np.linalg.norm(clf.predict(Xfeature_test).ravel() - y_test_clean.ravel()) / (len(y_test) ** 0.5)
                            best_cv_score = score
                            print(R, gamma, C, score, test_score)
                            print(R, gamma, C, score, test_score, file=f)

            return best_cv_score, test_score

        print(train_and_predict(), file=f)

(q1, q2) = (4, 5)
0 0.4 0.0001220703125 0.25199203412263527 0.25619912147907437
5 0.4 0.0009765625 0.24720200952473928 0.15459052582642505
5 0.4 0.001953125 0.22952820276951585 0.14151391504496996
5 0.6 0.001953125 0.2276538246085466 0.15845341398706472
10 0.75 0.0009765625 0.22740041212569445 0.1424694908043115
10 1.0 0.0009765625 0.22301038506632692 0.15385457775371217


In [192]:
# Training and testing algorithm

# set size of local region
delta1 = -1

# set max number of feature entries
max_R = 1000

# set of pairs of qubits we care about predicting correlation function for
d = 1
qubits = get_nearby_qubit_pairs(d)

# set test size
test_size = 0.4

train_idx, test_idx, _, _ = train_test_split(range(len(Xfull)), range(len(Xfull)), test_size=test_size, random_state=0)

# generate omega to pass into feature mapping
omega = []
for (q1, q2) in all_edges: # TODO: change this as well when changing feature mapping
    m_local = len(get_local_region_edges(q1, q2, delta1))
    omega_sub = []
    for j in range(max_R):
        omega_sub.append(np.random.normal(0, 1, m_local))
    omega.append(omega_sub)

with open('./results/{}_data_shadow_size={}/results_{}x{}_{}_data_lasso_R=0-320_C=-13--9_gamma=0.4-1.0_delta1={}_local_feature_qubits{}.txt'.format(data_name, shadow_size, length, width, data_name, delta1, d), 'w') as f:
    for (q1, q2) in qubits[7:8]:
        print('(q1, q2) =', (q1, q2))
        print('(q1, q2) =', (q1, q2), file=f)

        def train_and_predict():
            # consider the pair (q1, q2)
            global q1, q2

            # training data (estimated from measurement data)
            y = np.array([Ytrain[i].reshape((length * width, length * width))[q1 - 1][q2 - 1] for i in range(len(Xfull))])
            X_train, X_test, y_train, y_test = train_test_split(Xfull_norm, y, test_size=test_size, random_state=0)

            # testing data (exact expectation values)
            y_clean = np.array([Yfull[i].reshape((length * width, length * width))[q1 - 1][q2 - 1] for i in range(len(Xfull))])
            _, _, _, y_test_clean = train_test_split(Xfull_norm, y_clean, test_size=test_size, random_state=0)

            # use cross validation to find the best hyperparameters
            best_cv_score, test_score = 999.0, 999.0
            ML_method = lambda Cx : linear_model.Lasso(alpha=Cx, max_iter=10000)
            # ML_method = lambda Cx: KernelRidge(kernel='linear', alpha=Cx)
            
            for R in [0, 5, 10, 20, 40, 80, 160, 320]:
                #for gamma in [0.0625, 0.125, 0.25, 0.5, 1.0, 2.0]:
                #for gamma in [0.125, 0.25, 0.3, 0.4, 0.5, 0.6, 0.75, 1.0]:
                for gamma in [0.4, 0.5, 0.6, 0.65, 0.7, 0.75, 1.0]:
                    # feature mapping
                    Xfeature_train = get_feature_vectors(delta1, R, X_train, omega, gamma, q1, q2)
                    Xfeature_test = get_feature_vectors(delta1, R, X_test, omega, gamma, q1, q2)

                    for C in [2**(-13), 2**(-12), 2**(-11), 2**(-10), 2**(-9)]:
                        score = -np.mean(cross_val_score(ML_method(C), Xfeature_train, y_train, cv=5, scoring="neg_root_mean_squared_error"))
                        if best_cv_score > score:
                            clf = ML_method(C).fit(Xfeature_train, y_train.ravel())
                            test_score = np.linalg.norm(clf.predict(Xfeature_test).ravel() - y_test_clean.ravel()) / (len(y_test) ** 0.5)
                            best_cv_score = score
                            print(R, gamma, C, score, test_score)
                            print(R, gamma, C, score, test_score, file=f)

            return best_cv_score, test_score

        print(train_and_predict(), file=f)

(q1, q2) = (4, 5)
0 0.4 0.0001220703125 0.25199203412263527 0.25619912147907437
5 0.4 0.001953125 0.24501113107857425 0.15218067682718503
10 0.4 0.001953125 0.24017175517430428 0.13937621462981103
320 0.75 0.001953125 0.23977756736623013 0.15168143810555285
320 1.0 0.0001220703125 0.2289171505966186 0.1635257364368335


In [148]:
# Training and testing algorithm
# Configured for outputting coefficients of linear model

# set size of local region
delta1 = 0

# set max number of feature entries
max_R = 1000

# set of pairs of qubits we care about predicting correlation function for
d = 1
qubits = get_nearby_qubit_pairs(d)

# set test size
test_size = 0.4

train_idx, test_idx, _, _ = train_test_split(range(len(Xfull)), range(len(Xfull)), test_size=test_size, random_state=0)

# generate omega to pass into feature mapping
omega = []
for (q1, q2) in all_edges: # TODO: change this as well when changing feature mapping
    m_local = len(get_local_region_edges(q1, q2, delta1))
    omega_sub = []
    for j in range(max_R):
        omega_sub.append(np.random.normal(0, 1, m_local))
    omega.append(omega_sub)

with open('./results/{}_data_clf_coefficients/coefficient_{}x{}_delta1={}_local_feature_qubits{}.txt'.format(data_name, length, width, delta1, d), 'w') as f:
    for (q1, q2) in qubits:
        print('(q1, q2) =', (q1, q2))
        print('(q1, q2) =', (q1, q2), file=f)

        def train_and_predict():
            # consider the pair (q1, q2)
            global q1, q2

            # training data (estimated from measurement data)
            y = np.array([Ytrain[i].reshape((length * width, length * width))[q1 - 1][q2 - 1] for i in range(len(Xfull))])
            X_train, X_test, y_train, y_test = train_test_split(Xfull_norm, y, test_size=test_size, random_state=0)

            # testing data (exact expectation values)
            y_clean = np.array([Yfull[i].reshape((length * width, length * width))[q1 - 1][q2 - 1] for i in range(len(Xfull))])
            _, _, _, y_test_clean = train_test_split(Xfull_norm, y_clean, test_size=test_size, random_state=0)

            # use cross validation to find the best hyperparameters
            best_cv_score, test_score = 999.0, 999.0
            best_coef = 0
            ML_method = lambda Cx : linear_model.Lasso(alpha=Cx, max_iter=30000)
            # ML_method = lambda Cx: KernelRidge(kernel='linear', alpha=Cx)

            for R in [0, 5, 10, 20, 40, 80, 160, 320]:
                for gamma in [0.4, 0.5, 0.6, 0.65, 0.7, 0.75, 1.0]:
                    # feature mapping
                    Xfeature_train = get_feature_vectors(delta1, R, X_train, omega, gamma, q1, q2)
                    Xfeature_test = get_feature_vectors(delta1, R, X_test, omega, gamma, q1, q2)

                    for C in [2**(-13), 2**(-12), 2**(-11), 2**(-10), 2**(-9)]:
                        score = -np.mean(cross_val_score(ML_method(C), Xfeature_train, y_train, cv=5, scoring="neg_root_mean_squared_error"))
                        if best_cv_score > score:
                            clf = ML_method(C).fit(Xfeature_train, y_train.ravel())
                            test_score = np.linalg.norm(clf.predict(Xfeature_test).ravel() - y_test_clean.ravel()) / (len(y_test) ** 0.5)
                            best_cv_score = score
                            best_coef = clf.coef_
    #                             print(clf.coef_)
                            print(R, gamma, C, score, test_score)

            return best_cv_score, test_score, best_coef

        print(train_and_predict()[2], file=f)
        print(train_and_predict())

(q1, q2) = (1, 6)
0 0.4 0.0001220703125 0.22778087377579329 0.20181111768635496
5 0.4 0.0001220703125 0.20254744526977436 0.14195624644832938
5 0.4 0.000244140625 0.16831936209519216 0.12606384178245242
5 0.4 0.00048828125 0.14759287159289125 0.10979723957682785
5 0.4 0.0009765625 0.13853449163996046 0.09425642869705556
5 0.4 0.001953125 0.11982115476677713 0.08770085998869379


KeyboardInterrupt: 

In [200]:
#
# Dirichlet kernel
#

kernel_dir = np.zeros((len(Xfull), Xfull.shape[1]*5))
for i, x1 in enumerate(Xfull):
    cnt = 0
    for k in range(len(x1)):
        for k1 in range(-2, 3):
            kernel_dir[i, cnt] += np.cos(np.pi * k1 * x1[k])
            cnt += 1
print("constructed Dirichlet kernel")
            
#
# Neural tangent kernel
#
    
init_fn, apply_fn, kernel_fn = stax.serial(
    stax.Dense(32), stax.Relu(),
    stax.Dense(32), stax.Relu(),
    stax.Dense(1)
)
kernel_NN2 = kernel_fn(Xfull, Xfull, 'ntk')

init_fn, apply_fn, kernel_fn = stax.serial(
    stax.Dense(32), stax.Relu(),
    stax.Dense(32), stax.Relu(),
    stax.Dense(32), stax.Relu(),
    stax.Dense(1)
)
kernel_NN3 = kernel_fn(Xfull, Xfull, 'ntk')
                
init_fn, apply_fn, kernel_fn = stax.serial(
    stax.Dense(32), stax.Relu(),
    stax.Dense(32), stax.Relu(),
    stax.Dense(32), stax.Relu(),
    stax.Dense(32), stax.Relu(),
    stax.Dense(1)
)
kernel_NN4 = kernel_fn(Xfull, Xfull, 'ntk')

init_fn, apply_fn, kernel_fn = stax.serial(
    stax.Dense(32), stax.Relu(),
    stax.Dense(32), stax.Relu(),
    stax.Dense(32), stax.Relu(),
    stax.Dense(32), stax.Relu(),
    stax.Dense(32), stax.Relu(),
    stax.Dense(1)
)
kernel_NN5 = kernel_fn(Xfull, Xfull, 'ntk')

list_kernel_NN = [kernel_NN2, kernel_NN3, kernel_NN4, kernel_NN5]

for r in range(len(list_kernel_NN)):
    kernel = list_kernel_NN[r].copy()
    for i in range(len(list_kernel_NN[r])):
        for j in range(len(list_kernel_NN[r])):
            # list_kernel_NN[r][i][j] /= (list_kernel_NN[r][i][i] * list_kernel_NN[r][j][j]) ** 0.5
            list_kernel_NN[r][i].at[j].divide((kernel[i][i] * kernel[j][j]) ** 0.5)
print("constructed neural tangent kernel")
            
#
# RBF kernel is defined in Sklearn
#
print("RBF kernel (will be constructed in sklearn)")


constructed Dirichlet kernel
constructed neural tangent kernel
RBF kernel (will be constructed in sklearn)


In [201]:
# Training and testing algorithm (Old method)

# set of pairs of qubits we care about predicting correlation function for
d = 1
qubits = get_nearby_qubit_pairs(d)

test_size = 0.4

train_idx, test_idx, _, _ = train_test_split(range(len(Xfull)), range(len(Xfull)), test_size=test_size, random_state=0)

#with open('./results/orig_algorithm/orig_algorithm_test_size={}/orig_algorithm_{}_data_k/results_{}x{}_all_qubits.txt'.format(test_size, data_name, length, width), 'w') as f:
for (q1, q2) in qubits[7:8]:
    # each k corresponds to the correlation function in a pair of qubits
    print("(q1, q2) = ({}, {})".format(q1, q2))
    #print("k =", k, file=f)

    def train_and_predict(kernel, opt="linear"): # opt="linear" or "rbf"

        # instance-wise normalization
        for i in range(len(kernel)):
            if type(kernel) == np.ndarray:
                kernel[i] /= np.linalg.norm(kernel[i])
            else:
                kernel.at[i].divide(np.linalg.norm(kernel[i]))

        # consider the k-th pair
        global q1, q2

        # training data (estimated from measurement data)
        y = np.array([Ytrain[i].reshape((length * width, length * width))[q1 - 1][q2 - 1] for i in range(len(Xfull))])
        X_train, X_test, y_train, y_test = train_test_split(kernel, y, test_size=test_size, random_state=0)

        # testing data (exact expectation values)
        y_clean = np.array([Yfull[i].reshape((length * width, length * width))[q1 - 1][q2 - 1] for i in range(len(Xfull))])
        _, _, _, y_test_clean = train_test_split(kernel, y_clean, test_size=test_size, random_state=0)

        # use cross validation to find the best method + hyper-param
        best_cv_score, test_score = 999.0, 999.0
        for ML_method in [(lambda Cx: svm.SVR(kernel=opt, C=Cx)), (lambda Cx: KernelRidge(kernel=opt, alpha=1/(2*Cx)))]:
            for C in [0.0125, 0.025, 0.05, 0.125, 0.25, 0.5, 1.0, 2.0]:
                score = -np.mean(cross_val_score(ML_method(C), X_train, y_train, cv=5, scoring="neg_root_mean_squared_error"))
                if best_cv_score > score:
                    clf = ML_method(C).fit(X_train, y_train.ravel())
                    test_score = np.linalg.norm(clf.predict(X_test).ravel() - y_test_clean.ravel()) / (len(y_test) ** 0.5)
                    best_cv_score = score

        return best_cv_score, test_score

    # Dirichlet
    #print("Dirich. kernel", train_and_predict(kernel_dir), file=f)
    print("Dirich. kernel", train_and_predict(kernel_dir))
    # RBF
    #print("Gaussi. kernel", train_and_predict(Xfull, opt="rbf"), file=f)
    print("Gaussi. kernel", train_and_predict(Xfull, opt="rbf"))
    # Neural tangent
    for kernel_NN in list_kernel_NN:
        #print("Neur. T kernel", train_and_predict(kernel_NN), file=f)
        print("Neur. T kernel", train_and_predict(kernel_NN))

(q1, q2) = (4, 5)
Dirich. kernel (0.2524725471445278, 0.24720161876210964)
Gaussi. kernel (0.18580648811215386, 0.19334888028240627)
Neur. T kernel (0.13558781374889525, 0.1640358656018234)
Neur. T kernel (0.1397908436522027, 0.16704989523840308)
Neur. T kernel (0.16193101833276144, 0.18731811705888335)
Neur. T kernel (0.19950551325783114, 0.21130932478677994)
