# Simulation template (less features)

In this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with fewer lattice sites

## Defining parameters

In [2]:
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
CPU times: user 382 µs, sys: 103 µs, total: 485 µs
Wall time: 381 µs


In [3]:
%%time
### Dataset and simulation parameters
csv_path = SSH1_PERIODIC_100_6561_CSV 
model_name = "DecisionTreeClassifier"
model_kw = {"criterion":"entropy"}
allowed_windings = [0,1]
simulation_dir = SSH1_PERIODIC_LESS_100_6561_SIMULATION_DIR
val_split = 0.9  
features_to_use = [0, 1, 3, 50, 51, 53]
shuffle_features = False
random_state = 936                   

### Running a simulation
n_experiments = 5
start_n = 0
fit_params = None
shuffle_rows = True
pred_params = None
random_features = False
######### DON'T SET THIS TO TRUE UNLESS YOUR DATASET IS SMALL!! WILL FLOOD YOUR MEMORY!!!
store_in_lists = False   
########## BELOW ARE THE PARAMETERS THAT CONTROL WHAT WILL BE SAVED
save_eigenvector=True
save_hamiltonian=True 
save_accuracy=True 
save_models=True

CPU times: user 4 µs, sys: 1e+03 ns, total: 5 µs
Wall time: 6.44 µs


In [4]:
%%time
#Starting an instance of Simulation with a decision tree
model = DecisionTreeClassifier(criterion="entropy")
#dict_args = {"csv_path":csv_path, "model":model, "allowed_windings":allowed_windings,\
#             "simulation_dir":simulation_dir, "val_split":val_split, "features_to_use":features_to_use,\
#            "shuffle_features":shuffle_features, "random_state":random_state}
simulation = Simulation(csv_path,model_name,model_kw,allowed_windings,simulation_dir,val_split,features_to_use,\
                       shuffle_features,random_state)
#simulation = Simulation(**dict_args)

print("Info on all data: \n")
simulation.dataframe.info()
simulation.dataframe.head()

Info on all data: 

<class 'pandas.core.frame.DataFrame'>
Int64Index: 656100 entries, 0 to 656099
Columns: 108 entries, id to feat99
dtypes: float64(103), int32(3), object(2)
memory usage: 538.1+ MB
CPU times: user 5.71 s, sys: 692 ms, total: 6.4 s
Wall time: 7.9 s


Unnamed: 0,id,path,t1,t2,winding,phase,pred_phase,type_of,feat0,feat1,...,feat90,feat91,feat92,feat93,feat94,feat95,feat96,feat97,feat98,feat99
0,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,0.1,0.1,...,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1
1,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,0.141421,0.141142,...,0.114412,0.108967,0.123928,0.119406,0.13149,0.127962,0.136978,0.1345,0.140306,0.138916
2,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,0.141421,0.140306,...,0.043702,0.0265,0.075777,0.060214,0.103092,0.090145,0.123928,0.114412,0.136978,0.13149
3,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,-0.1,0.1,...,-0.1,0.1,-0.1,0.1,-0.1,0.1,-0.1,0.1,-0.1,0.1
4,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,666,test,0.000509,-0.008372,...,-0.082713,-0.089753,-0.067684,-0.075347,-0.051587,-0.059753,-0.034677,-0.043217,-0.01722,-0.026


#### Checking initialization

In [5]:
%%time
n_features = simulation.n_features
n_hamiltonians = simulation.n_hamiltonians
n_ts = simulation.n_ts

print("n_features: ", n_features)
print("n_hamiltonians: ", n_hamiltonians)
print("n_ts: ", n_ts)

n_features:  100
n_hamiltonians:  6561
n_ts:  2
CPU times: user 198 µs, sys: 28 µs, total: 226 µs
Wall time: 173 µs


In [6]:
%%time
n_total = len(simulation.dataframe)
n_train = len(simulation.dataframe[simulation.dataframe.type_of == "train"])
n_val = len(simulation.dataframe[simulation.dataframe.type_of == "val"])
n_test = len(simulation.dataframe[simulation.dataframe.type_of == "test"])
n_train_hamiltonians = len(simulation.train_ids)
n_val_hamiltonians = len(simulation.val_ids)
n_test_hamiltonians = len(simulation.test_ids)
n_total_hamiltonians = n_train_hamiltonians + n_val_hamiltonians + n_test_hamiltonians

print("% train: ", n_train/n_total)
print("% val: ",  n_val/n_total)
print("% test: ", n_test/n_total)
print("% train + val + test: ", (n_train+n_val+n_test)/n_total)
print("\n")
print("number of train hamiltonians: ", n_train_hamiltonians)
print("number of val hamiltonians: ", n_val_hamiltonians)
print("number of test hamiltonians: ", n_test_hamiltonians)
print("total number of hamiltonians: ", n_total_hamiltonians)
print("\n")
print("train ids: ", simulation.train_ids)
print("val ids: ", simulation.val_ids)
print("test ids: ", simulation.test_ids)

% train:  0.846822130772748
% val:  0.0
% test:  0.15317786922725193
% train + val + test:  1.0


number of train hamiltonians:  5556
number of val hamiltonians:  0
number of test hamiltonians:  1005
total number of hamiltonians:  6561


train ids:  [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201

## Running a simulation

In [6]:
%%time
simulation.run_simulation(n_experiments, start_n, fit_params, shuffle_rows, pred_params, random_features, \
                          store_in_lists, save_eigenvector, save_hamiltonian, save_accuracy,\
                          save_models)

simulation.dataframe.head(10)

running experiments:   0%|          | 0/5 [00:00<?, ?it/s]

THIS IS  predict_params:  None
THIS IS  the type of predict_params:  <class 'NoneType'>


running experiments:  20%|██        | 1/5 [00:10<00:42, 10.58s/it]

THIS IS  predict_params:  None
THIS IS  the type of predict_params:  <class 'NoneType'>


running experiments:  40%|████      | 2/5 [00:20<00:31, 10.40s/it]

THIS IS  predict_params:  None
THIS IS  the type of predict_params:  <class 'NoneType'>


running experiments:  60%|██████    | 3/5 [00:29<00:20, 10.11s/it]

THIS IS  predict_params:  None
THIS IS  the type of predict_params:  <class 'NoneType'>


running experiments:  80%|████████  | 4/5 [00:39<00:09,  9.95s/it]

THIS IS  predict_params:  None
THIS IS  the type of predict_params:  <class 'NoneType'>


running experiments: 100%|██████████| 5/5 [00:49<00:00,  9.85s/it]

CPU times: user 49 s, sys: 211 ms, total: 49.3 s
Wall time: 49.2 s





Unnamed: 0,id,path,t1,t2,winding,phase,pred_phase,type_of,feat0,feat1,...,feat90,feat91,feat92,feat93,feat94,feat95,feat96,feat97,feat98,feat99
0,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,1,test,0.1,0.1,...,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1
1,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,1,test,0.141421,0.141142,...,0.114412,0.108967,0.123928,0.119406,0.13149,0.127962,0.136978,0.1345,0.140306,0.138916
2,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,0,test,0.141421,0.140306,...,0.043702,0.0265,0.075777,0.060214,0.103092,0.090145,0.123928,0.114412,0.136978,0.13149
3,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,1,test,-0.1,0.1,...,-0.1,0.1,-0.1,0.1,-0.1,0.1,-0.1,0.1,-0.1,0.1
4,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,1,test,0.000509,-0.008372,...,-0.082713,-0.089753,-0.067684,-0.075347,-0.051587,-0.059753,-0.034677,-0.043217,-0.01722,-0.026
5,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,1,test,0.001204,-0.01653,...,-0.134123,-0.138686,-0.118757,-0.127445,-0.095929,-0.108196,-0.067073,-0.082149,-0.034003,-0.05094
6,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,0,test,-0.141421,-0.13898,...,0.043375,0.06783,-0.009222,0.017385,-0.060524,-0.035502,-0.103326,-0.083403,-0.131616,-0.119589
7,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,1,test,-0.000826,0.025688,...,0.134753,0.124324,0.141088,0.140407,0.127608,0.136771,0.096206,0.113925,0.051292,0.075079
8,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,1,test,0.141417,0.136684,...,-0.115093,-0.131915,-0.061266,-0.09104,0.007717,-0.027643,0.074791,0.042592,0.123363,0.102291
9,0,H_-2_-2_0.509296.dat,-2.0,-2.0,0.509296,999,1,test,0.001302,0.03643,...,0.082068,0.050848,0.127402,0.108132,0.141218,0.138667,0.120099,0.134896,0.069268,0.097755


#### Viewing a random sample

In [7]:
%%time
simulation.dataframe.sample(frac=0.1, replace=False)

CPU times: user 60.5 ms, sys: 8 µs, total: 60.5 ms
Wall time: 60 ms


Unnamed: 0,id,path,t1,t2,winding,phase,pred_phase,type_of,feat0,feat1,...,feat90,feat91,feat92,feat93,feat94,feat95,feat96,feat97,feat98,feat99
10605,106,H_-1.95_-0.75_-0.00166977.dat,-1.95,-0.75,-0.001670,0,0,val,0.000048,0.004883,...,-0.083086,0.087026,-0.068088,0.072369,-0.052016,0.056570,-0.035123,0.039879,-0.017677,0.022559
556641,5566,H_1.4_0.9_-0.00478808.dat,1.40,0.90,-0.004788,0,0,val,-0.002514,-0.086429,...,-0.081079,0.135718,0.139365,-0.093760,-0.037599,-0.055876,-0.107347,0.141341,0.129012,-0.064484
452326,4523,H_0.75_1.4_1.00575.dat,0.75,1.40,1.005754,1,1,val,0.005868,-0.051216,...,-0.005868,0.051216,-0.087801,-0.036048,-0.136197,-0.109543,-0.132571,-0.141196,-0.078307,-0.118917
500913,5009,H_1.05_1.4_1.0106.dat,1.05,1.40,1.010597,1,1,test,0.004439,-0.015850,...,-0.133062,-0.138550,-0.116968,-0.127147,-0.093526,-0.107754,-0.064206,-0.081590,-0.030853,-0.050301
399429,3994,H_0.45_-0.75_1.00167.dat,0.45,-0.75,1.001673,1,1,val,-0.003918,0.078050,...,-0.135659,0.136280,0.055684,-0.115983,0.064671,0.011581,-0.138129,0.101219,0.111423,-0.140620
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499659,4996,H_1.05_0.75_-0.00662397.dat,1.05,0.75,-0.006624,0,0,train,0.141421,-0.101327,...,-0.043937,-0.062514,-0.089955,0.140603,0.140337,-0.088164,-0.060438,-0.046122,-0.075568,0.137591
303915,3039,H_-0.15_0.1_0.0010704.dat,-0.15,0.10,0.001070,0,0,val,-0.000649,0.047348,...,0.134298,0.141369,-0.051456,-0.093079,-0.068699,-0.022707,0.139037,0.122027,-0.108552,-0.132859
46080,460,H_-1.75_0.75_0.000802739.dat,-1.75,0.75,0.000803,0,0,val,0.001209,0.052474,...,-0.084100,-0.119643,-0.095925,-0.051647,0.120049,0.138999,0.050935,-0.000444,-0.139138,-0.138832
231041,2310,H_-0.6_0.1_0.000382198.dat,-0.60,0.10,0.000382,0,0,val,0.141420,0.140156,...,-0.114802,-0.124482,-0.140221,-0.136686,-0.089632,-0.074797,0.009544,0.027636,0.103546,0.115089


#### Checking train/val/test splits again

In [8]:
%%time
n_total = len(simulation.dataframe)
n_train = len(simulation.dataframe[simulation.dataframe.type_of == "train"])
n_val = len(simulation.dataframe[simulation.dataframe.type_of == "val"])
n_test = len(simulation.dataframe[simulation.dataframe.type_of == "test"])
n_train_hamiltonians = len(simulation.train_ids)
n_val_hamiltonians = len(simulation.val_ids)
n_test_hamiltonians = len(simulation.test_ids)
n_total_hamiltonians = n_train_hamiltonians + n_val_hamiltonians + n_test_hamiltonians

print("% train: ", n_train/n_total)
print("% val: ",  n_val/n_total)
print("% test: ", n_test/n_total)
print("% train + val + test: ", (n_train+n_val+n_test)/n_total)
print("\n")
print("number of train hamiltonians: ", n_train_hamiltonians)
print("number of val hamiltonians: ", n_val_hamiltonians)
print("number of test hamiltonians: ", n_test_hamiltonians)
print("total number of hamiltonians: ", n_total_hamiltonians)
print("\n")
print("train ids: ", simulation.train_ids)
print("val ids: ", simulation.val_ids)
print("test ids: ", simulation.test_ids)

% train:  0.08474317939338516
% val:  0.7620789513793629
% test:  0.15317786922725193
% train + val + test:  1.0


number of train hamiltonians:  556
number of val hamiltonians:  5000
number of test hamiltonians:  1005
total number of hamiltonians:  6561


train ids:  [4369, 235, 3138, 4116, 3277, 3656, 224, 6413, 3119, 2028, 1962, 3267, 3145, 2465, 6057, 4461, 91, 2076, 3960, 5696, 2250, 833, 3706, 4770, 153, 374, 2262, 2676, 124, 5997, 697, 1463, 6551, 1083, 1198, 5927, 3342, 3742, 288, 1944, 4414, 4823, 1948, 2353, 6495, 2433, 352, 4381, 52, 2983, 1365, 2334, 592, 1135, 53, 1180, 5365, 3257, 4365, 5538, 5890, 1134, 2059, 1341, 4623, 2085, 5871, 2188, 3401, 4938, 5683, 3070, 5392, 3050, 2740, 6055, 4413, 2238, 5372, 6411, 2538, 5687, 1354, 3738, 4285, 3221, 2515, 1486, 1907, 6515, 4598, 843, 4971, 2735, 2692, 2828, 4724, 5348, 1443, 5429, 4067, 421, 2495, 1195, 3921, 3284, 5366, 1126, 3631, 3072, 3799, 6325, 4546, 2861, 6125, 3286, 1703, 358, 1694, 4272, 5363, 3508, 3030, 948, 188, 5

#### Checking summaries

In [9]:
%%time
ham_summary = simulation.hamiltonian_summary
print("length of ham_summary: ", len(ham_summary))
ham_summary

length of ham_summary:  6561
CPU times: user 193 µs, sys: 4 µs, total: 197 µs
Wall time: 112 µs


Unnamed: 0_level_0,t1,t2,type_of,0,1,phase,pred_phase
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,-2.0,-2.00,test,0.50,0.50,999,0
1,-2.0,-1.95,test,0.68,0.32,0,0
2,-2.0,-1.90,test,0.69,0.31,0,0
3,-2.0,-1.85,test,0.76,0.24,0,0
4,-2.0,-1.80,test,0.84,0.16,0,0
...,...,...,...,...,...,...,...
6556,2.0,1.80,test,0.86,0.14,0,0
6557,2.0,1.85,test,0.82,0.18,0,0
6558,2.0,1.90,test,0.72,0.28,0,0
6559,2.0,1.95,test,0.62,0.38,0,0


In [10]:
eigen_summary = simulation.eigenvector_summary
print("length of ham_summary: ", len(eigen_summary))
eigen_summary

length of ham_summary:  656100


Unnamed: 0,id,phase,pred_phase,type_of
0,0,999,1,test
1,0,999,1,test
2,0,999,0,test
3,0,999,1,test
4,0,999,1,test
...,...,...,...,...
656095,6560,999,0,test
656096,6560,999,0,test
656097,6560,999,0,test
656098,6560,999,1,test


#### Checking accuracies

In [11]:
simulation.accuracy

{'eigenvector_train': 0.9814388489208633,
 'eigenvector_val': 0.96381,
 'eigenvector_test': 0.8161611374407582,
 'hamiltonian_train': 1.0,
 'hamiltonian_val': 1.0,
 'hamiltonian_test': 1.0}

#### Checking data stored in  memory

In [12]:
ham_summary_list = simulation.hamiltonian_summary_list
ham_summary_list

[]

In [13]:
eigen_summary_list = simulation.eigenvector_summary_list
eigen_summary_list

[]

In [14]:
accuracy_list = simulation.accuracy_list
accuracy_list

{'eigenvector_train': [],
 'eigenvector_val': [],
 'eigenvector_test': [],
 'hamiltonian_train': [],
 'hamiltonian_val': [],
 'hamiltonian_test': []}

In [1]:
a = [1,2,3]

In [2]:
a

[1, 2, 3]

In [3]:
b = a

In [4]:
b[2] = 33
a

[1, 2, 33]

In [5]:
a = [1,2,3]
a

[1, 2, 3]

In [6]:
b = list(a)
b

[1, 2, 3]

In [7]:
b[2] = 33
a

[1, 2, 3]

In [8]:
b

[1, 2, 33]

In [10]:
import numpy as np

In [11]:
ar = np.array([[3,9],[4,12]])
ar

array([[ 3,  9],
       [ 4, 12]])

In [12]:
norms = np.linalg.norm(ar,axis=0)
norms

array([ 5., 15.])

In [13]:
81+144

225

In [14]:
np.sqrt(225)

15.0

In [15]:
1/norms*ar

array([[0.6, 0.6],
       [0.8, 0.8]])