# Simulation template 

In this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.

## Defining parameters

In [1]:
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

CPU times: user 744 ms, sys: 265 ms, total: 1.01 s
Wall time: 578 ms


In [2]:
%%time
### Dataset and simulation parameters
#csv_path = SSH2_PERIODIC_100_6561_CSV 
csv_path = SSH1_PERIODIC_100_6561_CSV 
model_name = "DecisionTreeClassifier"
model_kw = {"criterion":"entropy"}
allowed_windings = [0,1]
#allowed_windings = [-1,0,1,2]
simulation_dir = SSH1_PERIODIC_1ST_SCENARIO_100_6561_SIMULATION_DIR
#simulation_dir = SSH2_PERIODIC_ENGINEERED_100_6561_SIMULATION_DIR
val_split = 0.9  
features_to_use = None
shuffle_features = False
random_state = 763

### Fourier engineered features
fourier_features = list(range(0,100,1))
mode = "dft"
real = True
normalize = False
fillna = False

### Running a simulation
n_experiments = 5
start_n = 0
fit_params = None
#mode=mode
shuffle_rows = True
pred_params = None
random_features = False
######### DON'T SET THIS TO TRUE UNLESS YOUR DATASET IS SMALL!! WILL FLOOD YOUR MEMORY!!!
store_in_lists = False   
########## BELOW ARE THE PARAMETERS THAT CONTROL WHAT WILL BE SAVED
save_eigenvector=True
save_hamiltonian=True 
save_accuracy=True 
save_models=True

CPU times: user 5 µs, sys: 1e+03 ns, total: 6 µs
Wall time: 8.11 µs


In [3]:
# Parameters
model_kw = {"criterion": "entropy"}
allowed_windings = [0, 1]
val_split = 0.9
features_to_use = None
shuffle_features = False
n_experiments = 100
start_n = 0
fit_params = None
shuffle_rows = True
pred_params = None
random_features = False
store_in_lists = False
save_eigenvector = True
save_hamiltonian = True
save_accuracy = True
save_models = True
csv_path = "/home/rio/ssh_csvs/ssh1/periodic_100_6561.csv"
model_name = "DecisionTreeClassifier"
simulation_dir = "/home/rio/ssh_simulations/ssh1/periodic_1st_scenario_100_6561"
random_state = 467
fourier_features = [
    0,
    1,
    2,
    3,
    4,
    5,
    6,
    7,
    8,
    9,
    10,
    11,
    12,
    13,
    14,
    15,
    16,
    17,
    18,
    19,
    20,
    21,
    22,
    23,
    24,
    25,
    26,
    27,
    28,
    29,
    30,
    31,
    32,
    33,
    34,
    35,
    36,
    37,
    38,
    39,
    40,
    41,
    42,
    43,
    44,
    45,
    46,
    47,
    48,
    49,
    50,
]
mode = "dft"
real = True
normalize = False
fillna = False


In [4]:
print("These are the fourier features")
fourier_features

These are the fourier features


[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50]

In [5]:
%%time
#Starting an instance of Simulation with a decision tree
model = DecisionTreeClassifier(criterion="entropy")
#dict_args = {"csv_path":csv_path, "model":model, "allowed_windings":allowed_windings,\
#             "simulation_dir":simulation_dir, "val_split":val_split, "features_to_use":features_to_use,\
#            "shuffle_features":shuffle_features, "random_state":random_state}
simulation = Simulation(csv_path,model_name,model_kw,allowed_windings,simulation_dir,val_split,features_to_use,\
                       shuffle_features,random_state)
#simulation = Simulation(**dict_args)

print("Info on all data: \n")
simulation.dataframe.info()
simulation.dataframe.head()

Info on all data: 

<class 'pandas.core.frame.DataFrame'>
Int64Index: 656100 entries, 0 to 656099
Columns: 108 entries, id to feat99
dtypes: float64(103), int32(3), object(2)
memory usage: 538.1+ MB
CPU times: user 4.97 s, sys: 267 ms, total: 5.23 s
Wall time: 5.23 s


Unnamed: 0,id,path,t1,t2,winding,phase,pred_phase,type_of,feat0,feat1,...,feat90,feat91,feat92,feat93,feat94,feat95,feat96,feat97,feat98,feat99
0,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,0.1,0.1,...,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1
1,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,0.141421,0.141142,...,0.114412,0.108967,0.123928,0.119406,0.13149,0.127962,0.136978,0.1345,0.140306,0.138916
2,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,0.141421,0.140306,...,0.043702,0.0265,0.075777,0.060214,0.103092,0.090145,0.123928,0.114412,0.136978,0.13149
3,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,-0.1,0.1,...,-0.1,0.1,-0.1,0.1,-0.1,0.1,-0.1,0.1,-0.1,0.1
4,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,0.000509,-0.008372,...,-0.082713,-0.089753,-0.067684,-0.075347,-0.051587,-0.059753,-0.034677,-0.043217,-0.01722,-0.026


#### Generating Fourier engineered features

In [6]:
%%time
simulation.engineer_fourier_features(fourier_features,mode,real,normalize,fillna)
simulation.fourier_engineered_dataframe.info() 
simulation.fourier_engineered_dataframe.sample(15)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 656100 entries, 0 to 656099
Data columns (total 59 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   id          656100 non-null  int32  
 1   path        656100 non-null  object 
 2   t1          656100 non-null  float64
 3   t2          656100 non-null  float64
 4   winding     656100 non-null  float64
 5   phase       656100 non-null  int32  
 6   pred_phase  656100 non-null  int32  
 7   type_of     656100 non-null  object 
 8   dft_feat0   656100 non-null  float64
 9   dft_feat1   656100 non-null  float64
 10  dft_feat2   656100 non-null  float64
 11  dft_feat3   656100 non-null  float64
 12  dft_feat4   656100 non-null  float64
 13  dft_feat5   656100 non-null  float64
 14  dft_feat6   656100 non-null  float64
 15  dft_feat7   656100 non-null  float64
 16  dft_feat8   656100 non-null  float64
 17  dft_feat9   656100 non-null  float64
 18  dft_feat10  656100 non-null  float64
 19  df

Unnamed: 0,id,path,t1,t2,winding,phase,pred_phase,type_of,dft_feat0,dft_feat1,...,dft_feat41,dft_feat42,dft_feat43,dft_feat44,dft_feat45,dft_feat46,dft_feat47,dft_feat48,dft_feat49,dft_feat50
469427,4694,H_0.85_1.85_1.00495.dat,0.85,1.85,1.004946,1,666,train,6.245005e-17,4.024447e-16,...,8.190231e-15,-5.608383e-07,1.301544e-14,-5.771375,-1.900532e-14,2.588154e-06,-2.68079e-15,-3.452127e-06,-2.107027e-15,4e-06
601202,6012,H_1.7_-1.1_0.00105128.dat,1.7,-1.1,0.001051,0,666,train,10.0,-9.492516e-16,...,2.002041e-15,9.939958e-15,7.887867e-16,-6.44723e-15,7.3857e-16,3.397457e-15,7.186017e-15,5.637201e-15,-9.492396e-15,7.521761e-16
593580,5935,H_1.65_-0.9_0.000944438.dat,1.65,-0.9,0.000944,0,666,train,-2.775558e-17,5.845772e-06,...,-8.867757e-07,-1.533267e-15,-2.119552e-06,5.394179e-15,4.980283e-15,3.044913e-15,4.023234e-06,1.640196e-15,-2.12669e-07,1.04715e-15
124953,1249,H_-1.25_-0.3_-0.000844261.dat,-1.25,-0.3,-0.000844,0,666,train,-5.5511150000000004e-17,6.205892e-16,...,1.131202e-15,6.007705e-15,1.145062e-15,3.408148e-15,-1.321336e-05,7.576561e-15,-4.383038e-16,-4.726734e-15,-7.854553e-16,1.112428e-16
358332,3583,H_0.2_-1.05_1.00225.dat,0.2,-1.05,1.002248,1,666,train,2e-06,1.739454e-15,...,-1.757944e-16,3.814237e-06,-2.289734e-15,-8.367544e-07,-1.253711e-14,2.761967,5.769829e-15,1.715976e-06,-5.327448e-16,6e-06
563004,5630,H_1.45_0.05_-9.55277e-05.dat,1.45,0.05,-9.6e-05,0,666,train,1.446759e-15,0.1320857,...,-5.755989e-06,1.90651e-15,2.034322e-06,-1.583551e-15,3.902113e-06,5.96702e-16,-3.079103e-06,5.603366e-15,-0.2368325,1.80409e-17
254126,2541,H_-0.45_-0.5_1.02497.dat,-0.45,-0.5,1.024967,1,666,test,0.0,6.807533e-16,...,9.314244e-16,1.590834e-15,-4.38945e-16,3.874097e-15,2.474566e-06,6.114609e-15,-1.174728e-15,-3.459294e-15,3.1500600000000003e-17,-2.34246e-18
211093,2110,H_-0.7_-1.8_1.00438.dat,-0.7,-1.8,1.004376,1,666,train,0.0,1.496002e-16,...,-9.452273e-16,2.386653e-06,6.300858e-16,1.770021e-06,5.037781e-15,-1.806121e-06,5.910263e-15,4.576047e-06,6.453476e-15,-1.056988e-15
109799,1097,H_-1.35_0.2_0.000345206.dat,-1.35,0.2,0.000345,0,666,train,1.387779e-17,9.982373e-08,...,-2.818078e-08,3.823502e-16,6.817854e-07,-2.540961e-15,1.902113e-06,8.053333e-15,1.750815e-06,3.973359e-15,-6.541241e-06,-6.489294e-16
335269,3352,H_0.05_-0.45_1.00241.dat,0.05,-0.45,1.002409,1,666,train,0.0,2.472238e-06,...,4.990909e-06,7.368209e-16,6.340516e-06,-2.239277e-15,-4.987589e-08,1.124314e-15,-1.621619e-06,-2.839265e-15,2.200953e-06,-2.363842e-16


In [7]:
#simulation.dataframe.iloc[331456,:]

In [8]:
#for ix, obj in enumerate(simulation.dataframe.iloc[331456,:].values):
#    print(ix,obj)

In [9]:
#simulation.fourier_engineered_dataframe.dct_feat0

In [10]:
#simulation.features

In [11]:
#simulation.fourier_matrix

In [12]:
#1/simulation.n_features*simulation.dataframe.loc[:,simulation.features].sum(axis=1)

In [13]:
#df_2 = simulation.fourier_engineered_dataframe[simulation.fourier_engineered_dataframe.phase==2]
#df_2.describe()

In [14]:
#df_1 = simulation.fourier_engineered_dataframe[simulation.fourier_engineered_dataframe.phase==1]
#df_1.describe()

In [15]:
#df_0 = simulation.fourier_engineered_dataframe[simulation.fourier_engineered_dataframe.phase==0]
#df_0.describe()

In [16]:
#plt.figure(figsize=(10,10))
#plt.scatter(df_1.fourier_feat0,df_1.fourier_feat2)
#plt.scatter(df_0.fourier_feat0,df_0.fourier_feat2,alpha=0.4)

In [17]:
#plt.scatter(df_0.fourier_feat0,df_0.fourier_feat2,alpha=0.4)

In [18]:
#plt.scatter(df_2.fourier_feat0,df_2.fourier_feat2)

In [19]:
#df_1.hist(figsize=(15,15))

In [20]:
#df_0.hist(figsize=(15,15))

#### Checking initialization

In [21]:
%%time
n_features = simulation.n_features
n_hamiltonians = simulation.n_hamiltonians
n_ts = simulation.n_ts

print("n_features: ", n_features)
print("n_hamiltonians: ", n_hamiltonians)
print("n_ts: ", n_ts)

n_features:  100
n_hamiltonians:  6561
n_ts:  2
CPU times: user 266 µs, sys: 32 µs, total: 298 µs
Wall time: 193 µs


In [22]:
%%time
n_total = len(simulation.dataframe)
n_train = len(simulation.dataframe[simulation.dataframe.type_of == "train"])
n_val = len(simulation.dataframe[simulation.dataframe.type_of == "val"])
n_test = len(simulation.dataframe[simulation.dataframe.type_of == "test"])
n_train_hamiltonians = len(simulation.train_ids)
n_val_hamiltonians = len(simulation.val_ids)
n_test_hamiltonians = len(simulation.test_ids)
n_total_hamiltonians = n_train_hamiltonians + n_val_hamiltonians + n_test_hamiltonians

print("% train: ", n_train/n_total)
print("% val: ",  n_val/n_total)
print("% test: ", n_test/n_total)
print("% train + val + test: ", (n_train+n_val+n_test)/n_total)
print("\n")
print("number of train hamiltonians: ", n_train_hamiltonians)
print("number of val hamiltonians: ", n_val_hamiltonians)
print("number of test hamiltonians: ", n_test_hamiltonians)
print("total number of hamiltonians: ", n_total_hamiltonians)
print("\n")
print("train ids: ", simulation.train_ids)
print("val ids: ", simulation.val_ids)
print("test ids: ", simulation.test_ids)

% train:  0.846822130772748
% val:  0.0
% test:  0.15317786922725193
% train + val + test:  1.0


number of train hamiltonians:  5556
number of val hamiltonians:  0
number of test hamiltonians:  1005
total number of hamiltonians:  6561


train ids:  [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201

## Running a simulation

In [23]:
%%time
simulation.run_simulation(n_experiments, start_n, fit_params,mode, shuffle_rows, pred_params, random_features, \
                          store_in_lists, save_eigenvector, save_hamiltonian, save_accuracy,\
                          save_models)

simulation.fourier_engineered_dataframe.head(10)

running experiments:   0%|          | 0/100 [00:00<?, ?it/s]

running experiments:   1%|          | 1/100 [00:21<34:40, 21.01s/it]

running experiments:   2%|▏         | 2/100 [00:40<33:47, 20.68s/it]

running experiments:   3%|▎         | 3/100 [01:01<33:19, 20.61s/it]

running experiments:   4%|▍         | 4/100 [01:21<32:58, 20.60s/it]

running experiments:   5%|▌         | 5/100 [01:42<32:42, 20.66s/it]

running experiments:   6%|▌         | 6/100 [02:03<32:28, 20.73s/it]

running experiments:   7%|▋         | 7/100 [02:24<32:02, 20.67s/it]

running experiments:   8%|▊         | 8/100 [02:43<31:16, 20.40s/it]

running experiments:   9%|▉         | 9/100 [03:04<31:02, 20.47s/it]

running experiments:  10%|█         | 10/100 [03:24<30:30, 20.34s/it]

running experiments:  11%|█         | 11/100 [03:46<30:38, 20.66s/it]

running experiments:  12%|█▏        | 12/100 [04:07<30:42, 20.94s/it]

running experiments:  13%|█▎        | 13/100 [04:28<30:25, 20.98s/it]

running experiments:  14%|█▍        | 14/100 [04:49<29:53, 20.85s/it]

running experiments:  15%|█▌        | 15/100 [05:08<29:02, 20.51s/it]

running experiments:  16%|█▌        | 16/100 [05:29<28:50, 20.60s/it]

running experiments:  17%|█▋        | 17/100 [05:49<28:07, 20.33s/it]

running experiments:  18%|█▊        | 18/100 [06:09<27:37, 20.21s/it]

running experiments:  19%|█▉        | 19/100 [06:29<27:11, 20.14s/it]

running experiments:  20%|██        | 20/100 [06:50<27:11, 20.40s/it]

running experiments:  21%|██        | 21/100 [07:11<27:00, 20.51s/it]

running experiments:  22%|██▏       | 22/100 [07:33<27:14, 20.95s/it]

running experiments:  23%|██▎       | 23/100 [07:53<26:45, 20.85s/it]

running experiments:  24%|██▍       | 24/100 [08:14<26:20, 20.80s/it]

running experiments:  25%|██▌       | 25/100 [08:35<26:14, 20.99s/it]

running experiments:  26%|██▌       | 26/100 [08:56<25:41, 20.83s/it]

running experiments:  27%|██▋       | 27/100 [09:16<25:07, 20.65s/it]

running experiments:  28%|██▊       | 28/100 [09:36<24:38, 20.53s/it]

running experiments:  29%|██▉       | 29/100 [09:57<24:11, 20.45s/it]

running experiments:  30%|███       | 30/100 [10:17<23:55, 20.51s/it]

running experiments:  31%|███       | 31/100 [10:36<22:58, 19.98s/it]

running experiments:  32%|███▏      | 32/100 [10:57<22:51, 20.16s/it]

running experiments:  33%|███▎      | 33/100 [11:16<22:18, 19.98s/it]

running experiments:  34%|███▍      | 34/100 [11:37<22:26, 20.40s/it]

running experiments:  35%|███▌      | 35/100 [11:58<22:03, 20.36s/it]

running experiments:  36%|███▌      | 36/100 [12:19<21:51, 20.49s/it]

running experiments:  37%|███▋      | 37/100 [12:39<21:35, 20.57s/it]

running experiments:  38%|███▊      | 38/100 [12:59<20:51, 20.18s/it]

running experiments:  39%|███▉      | 39/100 [13:18<20:20, 20.01s/it]

running experiments:  40%|████      | 40/100 [13:39<20:09, 20.16s/it]

running experiments:  41%|████      | 41/100 [14:00<20:11, 20.53s/it]

running experiments:  42%|████▏     | 42/100 [14:20<19:35, 20.26s/it]

running experiments:  43%|████▎     | 43/100 [14:40<19:10, 20.18s/it]

running experiments:  44%|████▍     | 44/100 [15:01<19:00, 20.37s/it]

running experiments:  45%|████▌     | 45/100 [15:19<18:13, 19.88s/it]

running experiments:  46%|████▌     | 46/100 [15:39<17:57, 19.95s/it]

running experiments:  47%|████▋     | 47/100 [16:00<17:42, 20.04s/it]

running experiments:  48%|████▊     | 48/100 [16:19<17:16, 19.93s/it]

running experiments:  49%|████▉     | 49/100 [16:41<17:29, 20.58s/it]

running experiments:  50%|█████     | 50/100 [17:02<17:14, 20.68s/it]

running experiments:  51%|█████     | 51/100 [17:23<16:47, 20.55s/it]

running experiments:  52%|█████▏    | 52/100 [17:42<16:08, 20.17s/it]

running experiments:  53%|█████▎    | 53/100 [18:03<15:54, 20.32s/it]

running experiments:  54%|█████▍    | 54/100 [18:24<15:45, 20.55s/it]

running experiments:  55%|█████▌    | 55/100 [18:43<15:09, 20.21s/it]

running experiments:  56%|█████▌    | 56/100 [19:03<14:41, 20.04s/it]

running experiments:  57%|█████▋    | 57/100 [19:22<14:10, 19.78s/it]

running experiments:  58%|█████▊    | 58/100 [19:42<13:56, 19.92s/it]

running experiments:  59%|█████▉    | 59/100 [20:02<13:39, 20.00s/it]

running experiments:  60%|██████    | 60/100 [20:22<13:22, 20.05s/it]

running experiments:  61%|██████    | 61/100 [20:43<13:05, 20.15s/it]

running experiments:  62%|██████▏   | 62/100 [21:02<12:33, 19.83s/it]

running experiments:  63%|██████▎   | 63/100 [21:23<12:26, 20.18s/it]

running experiments:  64%|██████▍   | 64/100 [21:43<12:01, 20.04s/it]

running experiments:  65%|██████▌   | 65/100 [22:03<11:45, 20.15s/it]

running experiments:  66%|██████▌   | 66/100 [22:24<11:28, 20.25s/it]

running experiments:  67%|██████▋   | 67/100 [22:43<11:03, 20.11s/it]

running experiments:  68%|██████▊   | 68/100 [23:03<10:44, 20.14s/it]

running experiments:  69%|██████▉   | 69/100 [23:23<10:18, 19.94s/it]

running experiments:  70%|███████   | 70/100 [23:43<09:56, 19.89s/it]

running experiments:  71%|███████   | 71/100 [24:03<09:39, 19.98s/it]

running experiments:  72%|███████▏  | 72/100 [24:24<09:26, 20.25s/it]

running experiments:  73%|███████▎  | 73/100 [24:45<09:10, 20.38s/it]

running experiments:  74%|███████▍  | 74/100 [25:06<08:54, 20.57s/it]

running experiments:  75%|███████▌  | 75/100 [25:25<08:28, 20.35s/it]

running experiments:  76%|███████▌  | 76/100 [25:46<08:10, 20.45s/it]

running experiments:  77%|███████▋  | 77/100 [26:07<07:56, 20.73s/it]

running experiments:  78%|███████▊  | 78/100 [26:30<07:50, 21.40s/it]

running experiments:  79%|███████▉  | 79/100 [26:50<07:20, 20.95s/it]

running experiments:  80%|████████  | 80/100 [27:11<06:57, 20.86s/it]

running experiments:  81%|████████  | 81/100 [27:32<06:38, 20.95s/it]

running experiments:  82%|████████▏ | 82/100 [27:52<06:08, 20.50s/it]

running experiments:  83%|████████▎ | 83/100 [28:12<05:47, 20.46s/it]

running experiments:  84%|████████▍ | 84/100 [28:32<05:25, 20.37s/it]

running experiments:  85%|████████▌ | 85/100 [28:53<05:06, 20.46s/it]

running experiments:  86%|████████▌ | 86/100 [29:13<04:44, 20.35s/it]

running experiments:  87%|████████▋ | 87/100 [29:33<04:22, 20.21s/it]

running experiments:  88%|████████▊ | 88/100 [29:53<04:03, 20.29s/it]

running experiments:  89%|████████▉ | 89/100 [30:13<03:42, 20.26s/it]

running experiments:  90%|█████████ | 90/100 [30:32<03:18, 19.81s/it]

running experiments:  91%|█████████ | 91/100 [30:51<02:56, 19.61s/it]

running experiments:  92%|█████████▏| 92/100 [31:12<02:40, 20.02s/it]

running experiments:  93%|█████████▎| 93/100 [31:31<02:17, 19.67s/it]

running experiments:  94%|█████████▍| 94/100 [31:51<01:58, 19.70s/it]

running experiments:  95%|█████████▌| 95/100 [32:11<01:39, 19.84s/it]

running experiments:  96%|█████████▌| 96/100 [32:31<01:19, 19.98s/it]

running experiments:  97%|█████████▋| 97/100 [32:52<01:00, 20.16s/it]

running experiments:  98%|█████████▊| 98/100 [33:12<00:40, 20.17s/it]

running experiments:  99%|█████████▉| 99/100 [33:33<00:20, 20.27s/it]

running experiments: 100%|██████████| 100/100 [33:52<00:00, 20.04s/it]

running experiments: 100%|██████████| 100/100 [33:52<00:00, 20.33s/it]

CPU times: user 33min 51s, sys: 1.82 s, total: 33min 52s
Wall time: 33min 52s





Unnamed: 0,id,path,t1,t2,winding,phase,pred_phase,type_of,dft_feat0,dft_feat1,...,dft_feat41,dft_feat42,dft_feat43,dft_feat44,dft_feat45,dft_feat46,dft_feat47,dft_feat48,dft_feat49,dft_feat50
0,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,1,test,10.0,-9.492516e-16,...,2.002041e-15,9.939958e-15,7.887867e-16,-6.44723e-15,7.3857e-16,3.397457e-15,7.186017e-15,5.637201e-15,-9.492396e-15,7.521761e-16
1,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,1,test,-9.714451e-16,7.043181,...,4.945312e-06,-5.540991e-15,3.345759e-07,-8.56525e-15,3.902113e-06,1.254253e-15,5.221327e-08,6.23934e-15,0.02787262,-9.648589e-16
2,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,0,test,5.5511150000000004e-17,1.976325e-15,...,-9.867786e-15,-3.855264e-06,-7.103689e-16,-4.164889e-07,8.14303e-15,-3.67376e-06,1.073251e-14,0.1110751,1.113201e-14,4e-06
3,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,1,test,0.0,-2.137289e-16,...,2.348986e-15,4.902321e-15,-1.625948e-15,1.781114e-14,1.571237e-15,4.049713e-15,8.559918e-15,1.311733e-14,1.734725e-14,-10.0
4,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,0,test,-1.63064e-16,-0.4177641,...,1.895428e-06,-1.857144e-15,-2.370346e-06,2.028406e-15,-1.557537e-06,-8.950521e-16,1.032379e-06,-6.575592e-15,0.4432186,-1.612599e-17
5,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,0,test,-9.228729e-16,5.164147e-15,...,7.647972e-15,-1.790758e-06,4.424319e-16,1.248733e-06,-4.673949e-15,3.954806e-06,-1.132259e-14,0.8801667,-1.550455e-14,1.2e-05
6,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,0,test,-1.249001e-16,-2.975733e-06,...,-3.095953e-06,-2.765887e-15,-1.203567e-06,7.257281e-15,3.871288e-06,-6.388426e-15,-0.2451267,6.87663e-15,6.438558e-07,8.467814e-16
7,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,0,test,4.1633360000000003e-17,-1.379742e-06,...,-1.857449e-06,-4.119832e-15,2.085785e-06,-8.943926e-15,-6.554235e-07,5.42842e-15,-1.302947,2.26024e-15,5.568957e-07,-2.569077e-16
8,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,0,test,-4e-06,1.020239e-15,...,2.341752e-15,1.323027e-06,5.145735e-15,-4.076888e-06,5.715952e-15,0.4513358,-1.012454e-14,5.715801e-07,-8.811769e-15,-6.973923e-16
9,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,1,test,4e-06,-2.138117e-15,...,-6.579038e-16,-1.179568e-06,7.964563e-16,2.259589e-06,9.927922e-15,-1.699157,-1.853805e-15,-5.682221e-06,2.425086e-15,-3.094253e-16


In [24]:
simulation.dataframe.head(10)

Unnamed: 0,id,path,t1,t2,winding,phase,pred_phase,type_of,feat0,feat1,...,feat90,feat91,feat92,feat93,feat94,feat95,feat96,feat97,feat98,feat99
0,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,0.1,0.1,...,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1
1,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,0.141421,0.141142,...,0.114412,0.108967,0.123928,0.119406,0.13149,0.127962,0.136978,0.1345,0.140306,0.138916
2,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,0.141421,0.140306,...,0.043702,0.0265,0.075777,0.060214,0.103092,0.090145,0.123928,0.114412,0.136978,0.13149
3,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,-0.1,0.1,...,-0.1,0.1,-0.1,0.1,-0.1,0.1,-0.1,0.1,-0.1,0.1
4,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,0.000509,-0.008372,...,-0.082713,-0.089753,-0.067684,-0.075347,-0.051587,-0.059753,-0.034677,-0.043217,-0.01722,-0.026
5,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,0.001204,-0.01653,...,-0.134123,-0.138686,-0.118757,-0.127445,-0.095929,-0.108196,-0.067073,-0.082149,-0.034003,-0.05094
6,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,-0.141421,-0.13898,...,0.043375,0.06783,-0.009222,0.017385,-0.060524,-0.035502,-0.103326,-0.083403,-0.131616,-0.119589
7,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,-0.000826,0.025688,...,0.134753,0.124324,0.141088,0.140407,0.127608,0.136771,0.096206,0.113925,0.051292,0.075079
8,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,0.141417,0.136684,...,-0.115093,-0.131915,-0.061266,-0.09104,0.007717,-0.027643,0.074791,0.042592,0.123363,0.102291
9,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,0.001302,0.03643,...,0.082068,0.050848,0.127402,0.108132,0.141218,0.138667,0.120099,0.134896,0.069268,0.097755


#### Viewing a random sample

In [25]:
%%time
simulation.fourier_engineered_dataframe.sample(frac=0.1, replace=False)

CPU times: user 35 ms, sys: 5 µs, total: 35 ms
Wall time: 34.8 ms


Unnamed: 0,id,path,t1,t2,winding,phase,pred_phase,type_of,dft_feat0,dft_feat1,...,dft_feat41,dft_feat42,dft_feat43,dft_feat44,dft_feat45,dft_feat46,dft_feat47,dft_feat48,dft_feat49,dft_feat50
389806,3898,H_0.4_-1.5_1.00211.dat,0.40,-1.50,1.002113,1,1,val,1.110223e-16,4.602895e-06,...,2.549583e-06,1.441154e-15,-3.741279e-06,1.756684e-15,-3.236068e-06,-8.378911e-17,-2.989276e-06,-7.606564e-15,6.064847e-06,-3.380490e-16
171670,1716,H_-0.95_-1.25_1.01103.dat,-0.95,-1.25,1.011026,1,1,test,4.000000e-06,2.816495e-17,...,2.611415e-16,2.671979e-06,4.145892e-16,-2.762973e-07,1.319941e-15,5.052567e-07,6.387100e-15,2.915661e-06,8.103193e-16,1.632685e-15
593254,5932,H_1.65_-1.05_0.00104066.dat,1.65,-1.05,0.001041,0,0,val,-4.163336e-17,-5.278171e-01,...,-2.827509e-06,2.491538e-17,9.545515e-08,1.438434e-14,-7.159210e-07,-2.545336e-15,-3.656290e-06,-1.146013e-14,7.276929e-01,3.410952e-14
55557,555,H_-1.7_1.45_0.00127128.dat,-1.70,1.45,0.001271,0,0,val,-9.714451e-17,-1.549016e-06,...,2.766433e-06,3.545464e-15,2.343989e-06,2.346767e-16,-5.411638e-06,4.051460e-15,1.387113e-06,-1.070060e-14,-1.589179e-06,-2.628185e-16
270988,2709,H_-0.35_-0.2_-0.00355401.dat,-0.35,-0.20,-0.003554,0,0,train,1.000000e-05,2.995115e-17,...,1.576894e-14,8.669648e-15,2.500939e-15,-5.867052e-16,3.891930e-15,1.591312e-14,1.242284e-14,1.999216e-15,9.581296e-15,-1.000000e-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
655651,6556,H_2_1.8_-0.0222906.dat,2.00,1.80,-0.022291,0,1,test,9.714451e-17,1.587406e-16,...,1.946034e-15,-8.178206e-07,-1.158474e-14,-6.664093e-06,2.755857e-15,1.777603e-06,5.884144e-15,-8.827508e-07,-4.807938e-15,-4.000000e-06
150110,1501,H_-1.1_0.15_0.000321038.dat,-1.10,0.15,0.000321,0,0,val,1.249001e-15,-2.057946e-06,...,-4.248953e-06,-4.737772e-15,-2.099633e-07,-8.589299e-15,1.097887e-06,2.212415e-15,-4.618902e-01,5.000205e-15,1.487735e-06,4.258816e-18
603776,6037,H_1.7_0.15_-0.000258825.dat,1.70,0.15,-0.000259,0,0,val,-1.387779e-16,-3.408425e-16,...,-1.572232e-15,7.511441e-07,3.009829e-15,8.410196e-07,-2.823569e-15,-2.695199e-06,3.382017e-15,-2.070236e-06,1.191585e-15,3.606182e-17
465973,4659,H_0.85_0.1_-0.000356583.dat,0.85,0.10,-0.000357,0,0,val,-2.428613e-16,-4.921700e-16,...,-8.636625e-16,-4.064844e-06,-3.703569e-15,-9.790540e-07,3.246049e-15,2.594565e-06,-7.830473e-15,6.116170e-06,-2.060955e-15,-1.507248e-16


#### Checking train/val/test splits again

In [26]:
%%time
n_total = len(simulation.dataframe)
n_train = len(simulation.dataframe[simulation.dataframe.type_of == "train"])
n_val = len(simulation.dataframe[simulation.dataframe.type_of == "val"])
n_test = len(simulation.dataframe[simulation.dataframe.type_of == "test"])
n_train_hamiltonians = len(simulation.train_ids)
n_val_hamiltonians = len(simulation.val_ids)
n_test_hamiltonians = len(simulation.test_ids)
n_total_hamiltonians = n_train_hamiltonians + n_val_hamiltonians + n_test_hamiltonians

print("% train: ", n_train/n_total)
print("% val: ",  n_val/n_total)
print("% test: ", n_test/n_total)
print("% train + val + test: ", (n_train+n_val+n_test)/n_total)
print("\n")
print("number of train hamiltonians: ", n_train_hamiltonians)
print("number of val hamiltonians: ", n_val_hamiltonians)
print("number of test hamiltonians: ", n_test_hamiltonians)
print("total number of hamiltonians: ", n_total_hamiltonians)
print("\n")
print("train ids: ", simulation.train_ids)
print("val ids: ", simulation.val_ids)
print("test ids: ", simulation.test_ids)

% train:  0.08474317939338516
% val:  0.7620789513793629
% test:  0.15317786922725193
% train + val + test:  1.0


number of train hamiltonians:  556
number of val hamiltonians:  5000
number of test hamiltonians:  1005
total number of hamiltonians:  6561


train ids:  [1615, 5283, 3574, 3415, 2340, 5913, 2221, 3260, 5892, 5214, 3750, 1657, 18, 4450, 3032, 2456, 4940, 2814, 3566, 3234, 2415, 3175, 2782, 4070, 3448, 1105, 2910, 4156, 2275, 1616, 2301, 2179, 2426, 5182, 6189, 341, 5427, 6017, 2766, 5192, 177, 4437, 1674, 3159, 1622, 5229, 6270, 3256, 3205, 2125, 1939, 4493, 6046, 3151, 1169, 3412, 3625, 1678, 3846, 5442, 3190, 3326, 3187, 3646, 5833, 4140, 1536, 1132, 1733, 1951, 5832, 6341, 1869, 1487, 278, 1736, 3603, 1375, 1938, 669, 6083, 3519, 869, 3673, 1251, 4847, 2487, 2695, 5965, 5112, 3554, 2243, 6128, 3014, 2935, 5687, 4550, 797, 1054, 3577, 3704, 2598, 3964, 2903, 624, 2824, 3235, 937, 2823, 1838, 2336, 4209, 2069, 3082, 2537, 5865, 6197, 3015, 3241, 3270, 5849, 6484, 3142, 22

#### Checking summaries

In [27]:
%%time
ham_summary = simulation.hamiltonian_summary
print("length of ham_summary: ", len(ham_summary))
ham_summary

length of ham_summary:  6561
CPU times: user 112 µs, sys: 0 ns, total: 112 µs
Wall time: 82.7 µs


Unnamed: 0_level_0,t1,t2,type_of,0,1,phase,pred_phase
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,-2.0,-2.00,test,0.47,0.53,999,1
1,-2.0,-1.95,test,0.42,0.58,0,1
2,-2.0,-1.90,test,0.44,0.56,0,1
3,-2.0,-1.85,test,0.57,0.43,0,0
4,-2.0,-1.80,test,0.47,0.53,0,1
...,...,...,...,...,...,...,...
6556,2.0,1.80,test,0.49,0.51,0,1
6557,2.0,1.85,test,0.46,0.54,0,1
6558,2.0,1.90,test,0.45,0.55,0,1
6559,2.0,1.95,test,0.46,0.54,0,1


In [28]:
ham_summary.describe()

Unnamed: 0,t1,t2,0,1,phase,pred_phase
count,6561.0,6561.0,6561.0,6561.0,6561.0,6561.0
mean,0.0,-4.331917e-18,0.474164,0.525836,25.002134,0.532846
std,1.169134,1.169134,0.317531,0.317531,154.495657,0.498958
min,-2.0,-2.0,0.0,0.02,0.0,0.0
25%,-1.0,-1.0,0.17,0.2,0.0,0.0
50%,0.0,0.0,0.44,0.56,1.0,1.0
75%,1.0,1.0,0.8,0.83,1.0,1.0
max,2.0,2.0,0.98,1.0,999.0,1.0


In [29]:
eigen_summary = simulation.eigenvector_summary
print("length of ham_summary: ", len(eigen_summary))
eigen_summary

length of ham_summary:  656100


Unnamed: 0,id,phase,pred_phase,type_of
0,0,999,1,test
1,0,999,1,test
2,0,999,0,test
3,0,999,1,test
4,0,999,0,test
...,...,...,...,...
656095,6560,999,1,test
656096,6560,999,1,test
656097,6560,999,1,test
656098,6560,999,1,test


#### Checking accuracies

In [30]:
simulation.accuracy

{'eigenvector_train': 0.9805755395683453,
 'eigenvector_val': 0.816686,
 'eigenvector_test': 0.5967890995260664,
 'hamiltonian_train': 1.0,
 'hamiltonian_val': 1.0,
 'hamiltonian_test': 0.8341232227488151}

#### Checking data stored in  memory

In [31]:
ham_summary_list = simulation.hamiltonian_summary_list
ham_summary_list

[]

In [32]:
eigen_summary_list = simulation.eigenvector_summary_list
eigen_summary_list

[]

In [33]:
accuracy_list = simulation.accuracy_list
accuracy_list

{'eigenvector_train': [],
 'eigenvector_val': [],
 'eigenvector_test': [],
 'hamiltonian_train': [],
 'hamiltonian_val': [],
 'hamiltonian_test': []}

In [34]:
#simulation.fourier_matrix

In [35]:
#simulation.fourier_matrix.shape

In [36]:
#simulation.fourier_matrix[:,0]

In [37]:
#simulation.fourier_matrix[:,1]

In [38]:
#np.exp(-1j*2*np.pi*3/100)

In [39]:
#for i in range(1,50):
#    print("\n")
#    print(i)
#    print(np.sum((np.real(simulation.fourier_matrix[:,i]) - np.real(simulation.fourier_matrix[:,-i]))**2))
#    print(np.sum((np.imag(simulation.fourier_matrix[:,i])+ np.imag(simulation.fourier_matrix[:,-i]))**2))

In [40]:
print(simulation.fourier_matrix[:,0])

[1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j
 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j
 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j
 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j
 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j
 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j
 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j
 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j
 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j
 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j 1.+0.j]


In [41]:
print(simulation.fourier_matrix[:,50])

[ 1.+0.00000000e+00j -1.-4.99600361e-16j  1.-6.43249060e-16j
 -1.-3.67394040e-16j  1.-1.28649812e-15j -1.+1.16403344e-15j
  1.+7.34788079e-16j -1.+2.69546092e-15j  1.-2.57299624e-15j
 -1.+2.45053156e-15j  1.-2.32806688e-15j -1.+2.20560220e-15j
  1.+1.46957616e-15j -1.+1.96067284e-15j  1.-5.39092184e-15j
 -1.+1.71574348e-15j  1.-5.14599248e-15j -1.+1.47081412e-15j
  1.-4.90106312e-15j -1.+1.22588476e-15j  1.-4.65613376e-15j
 -1.+9.80955401e-16j  1.-4.41120440e-15j -1.+7.84145340e-15j
  1.+2.93915232e-15j -1.+4.91096681e-16j  1.-3.92134568e-15j
 -1.+7.35159468e-15j  1.-1.07818437e-14j -1.+1.23796127e-18j
  1.-3.43148696e-15j -1.+6.86173596e-15j  1.-1.02919850e-14j
 -1.-4.88620758e-16j  1.-2.94162824e-15j -1.+6.37187724e-15j
  1.-9.80212624e-15j -1.-9.78479478e-16j  1.-2.45176952e-15j
 -1.+5.88201852e-15j  1.-9.31226752e-15j -1.+1.27425165e-14j
  1.-1.96191080e-15j -1.+1.96030145e-14j  1.-8.82240880e-15j
 -1.-1.95819692e-15j  1.-1.56829068e-14j -1.+4.90230108e-15j
  1.+5.87830464e-15j -1.