# Running simulation template

This notebook runs the template notebook `simulation_template.ipynb`, which performs simulations for nearest neighbours (ssh1) and second neighbours (ssh2) systems.

# SSH1 periodic_100_6561

In [1]:
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

template = TEMPLATE_NOTEBOOK
parameters = {"model_kw":{"criterion":"entropy"}, \
             "allowed_windings":[0,1], "val_split":0.9, "features_to_use":None, "shuffle_features":False,\
             "n_experiments":100, "start_n":0, "fit_params":None, "shuffle_rows": True,"pred_params":None,\
             "random_features":False, "store_in_lists":False, "save_eigenvector":True,\
             "save_hamiltonian":True, "save_accuracy":True, "save_models":True,\
             }
kernel_name = KERNEL_NAME
seed=1000

CPU times: user 717 ms, sys: 248 ms, total: 965 ms
Wall time: 583 ms


#### SSH1 Real space all lattice sites 

In [2]:
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

### parameters
parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_REAL_SPACE_ALL_SITES
parameters["features_to_use"] = None #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 10
parameters["random_state"] += seed
### Fourier features
parameters["fourier_features_to_use"] = None
parameters["fourier_mode"] = None
parameters["fourier_real"] = None
parameters["fourier_normalize"]= None
parameters["fourier_fillna"] = None

output_file = SSH1_REAL_SPACE_ALL_SITES_SIMULATION_NOTEBOOK 
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True);





The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…


CPU times: user 4.77 s, sys: 124 ms, total: 4.9 s
Wall time: 16min 8s


{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T04:29:39.987605',
     'end_time': '2020-07-18T04:29:40.008341',
     'duration': 0.020736,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T04:29:40.025254',
     'end_time': '2020-07-18T04:29:40.044936'

#### SSH1 Real space best 04 lattice sites

In [3]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

json_dir = os.path.join(SSH1_REAL_SPACE_ALL_SITES,"feature_importances")
filename = os.path.join(json_dir,"sorted_feature_importance.json")
with open(filename) as f:
    json_data = json.load(f)
#feature_importances = json.load(os.path.join(json_dir,"feature_importance.json"))
feature_importances = {int(k): v for k,v in json_data.items()}
feature_importances = list(feature_importances.keys())
print("feature_iportances")
print(feature_importances)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
feature_iportances
[0, 50, 51, 3, 1, 53, 99, 49, 2, 5, 98, 55, 52, 7, 35, 95, 45, 57, 48, 19, 27, 69, 59, 85, 4, 9, 77, 54, 91, 47, 46, 79, 71, 63, 67, 87, 96, 41, 29, 11, 31, 97, 61, 15, 43, 83, 93, 23, 21, 75, 81, 39, 25, 6, 37, 94, 13, 65, 73, 89, 33, 17, 56, 8, 76, 44, 26, 58, 34, 82, 24, 90, 84, 32, 14, 18, 30, 40, 74, 10, 16, 42, 92, 22, 28, 36, 64, 88, 12, 70, 66, 68, 78, 80, 72, 38, 60, 20, 62, 86]
CPU times: user 983 µs, sys: 0 ns, total: 983 µs
Wall time: 724 µs
CPU times: user 2.41 ms, sys: 80 µs, total: 2.49 ms
Wall time: 2.2 ms


In [4]:
json_dir

'/home/rio/ssh_simulations/ssh1/periodic_100_6561/real_space_all_sites/feature_importances'

In [5]:
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

### Collecting lattice sites
N_half = 50
N_sites = 4
S_1=[]
for s in feature_importances:
    if s<=N_half:
        S_1.append(s)
    if len(S_1)==N_sites:
        break
S_1 = sorted(S_1)

### parameters
parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_REAL_SPACE_BEST_04_SITES
parameters["features_to_use"] = S_1 #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 11
parameters["random_state"] += seed
### Fourier features
parameters["fourier_features_to_use"] = None
parameters["fourier_mode"] = None
parameters["fourier_real"] = None
parameters["fourier_normalize"]= None
parameters["fourier_fillna"] = None

output_file = SSH1_REAL_SPACE_BEST_04_SITES_SIMULATION_NOTEBOOK
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True);





The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…


CPU times: user 4.09 s, sys: 76 ms, total: 4.17 s
Wall time: 2min 42s
CPU times: user 4.09 s, sys: 76 ms, total: 4.17 s
Wall time: 2min 42s


{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T05:02:56.548541',
     'end_time': '2020-07-18T05:02:56.604535',
     'duration': 0.055994,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T05:02:56.624686',
     'end_time': '2020-07-18T05:02:56.642662'

#### SSH1 DFT all wavevector sites


In [7]:
%%time
%%time
%load_ext autoreload
%autoreload 2
#from simulation import *

#parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
#parameters["model_name"] = "DecisionTreeClassifier"
#parameters["simulation_dir"] = SSH1_DFT_ALL_WAVEVECTOR_SITES
#parameters["features_to_use"] = None#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
#parameters["random_state"] = 12
#parameters["random_state"] += seed

#### Fourier features
#parameters["fourier_features_to_use"] = None#list(range(0,51,1))
#parameters["fourier_mode"] = "dft"
#parameters["fourier_real"] = None
#parameters["fourier_normalize"]= False
#parameters["fourier_fillna"] = False

#output_file = SSH1_DFT_ALL_WAVEVECTOR_SITES_SIMULATION_NOTEBOOK
#pm.execute_notebook(template,
#                    output_file,
#                    parameters=parameters,
#                    kernel_name=kernel_name,
#                    nest_asyncio=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
CPU times: user 257 µs, sys: 12 µs, total: 269 µs
Wall time: 202 µs
CPU times: user 866 µs, sys: 41 µs, total: 907 µs
Wall time: 830 µs


#### SSH1 DFT best 04 wavevector sites


In [8]:
%%time
%load_ext autoreload
%autoreload 2
#from simulation import *
#
#json_dir = os.path.join(SSH1_DFT_ALL_WAVEVECTOR_SITES,"feature_importances")
#filename = os.path.join(json_dir,"sorted_feature_importance.json")
#with open(filename) as f:
#    json_data = json.load(f)
##feature_importances = json.load(os.path.join(json_dir,"feature_importance.json"))
#wavevector_feature_importances = {int(k): v for k,v in json_data.items()}
#wavevector_feature_importances = list(wavevector_feature_importances.keys())
#print("wavevector_feature_importances")
#print(wavevector_feature_importances)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
CPU times: user 470 µs, sys: 0 ns, total: 470 µs
Wall time: 325 µs
CPU times: user 988 µs, sys: 23 µs, total: 1.01 ms
Wall time: 858 µs


In [10]:
%%time
%load_ext autoreload
%autoreload 2
### Collecting lattice sites

#N_half = 50
#N_sites = 4
#
#### Collecting wavevector lattice sites
#
#K_1=[]
#for s in wavevector_feature_importances:
#    #if s<=N_half:
#    K_1.append(s)
#    if len(K_1)==N_sites:
#        break
#K_1 = sorted(K_1)
#
#parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
#parameters["model_name"] = "DecisionTreeClassifier"
#parameters["simulation_dir"] = SSH1_DFT_BEST_04_WAVEVECTOR_SITES
#parameters["features_to_use"] = None
#parameters["random_state"] = 13
#parameters["random_state"] += seed
#
#### Fourier features
#parameters["fourier_features_to_use"] = K_1
#parameters["fourier_mode"] = "dft"
#parameters["fourier_real"] = None
#parameters["fourier_normalize"]= False
#parameters["fourier_fillna"] = False

#output_file = SSH1_DFT_BEST_04_WAVEVECTOR_SITES_SIMULATION_NOTEBOOK
#pm.execute_notebook(template,
#                    output_file,
#                    parameters=parameters,
#                    kernel_name=kernel_name,
#                    nest_asyncio=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
CPU times: user 405 µs, sys: 0 ns, total: 405 µs
Wall time: 353 µs
CPU times: user 1.11 ms, sys: 0 ns, total: 1.11 ms
Wall time: 1.04 ms


#### SSH1 DFT best 04 wavevector sites from best 04 real space sites

In [11]:
%%time
%load_ext autoreload
%autoreload 2
#parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
#parameters["model_name"] = "DecisionTreeClassifier"
#parameters["simulation_dir"] = SSH1_DFT_BEST_04_WAVEVECTOR_SITES_FROM_BEST_04_REAL_SPACE_SITES
#parameters["features_to_use"] = S_1
#parameters["random_state"] = 14
#parameters["random_state"] += seed

#### Fourier features
#parameters["fourier_features_to_use"] = K_1
#parameters["fourier_mode"] = "dft"
#parameters["fourier_real"] = None
#parameters["fourier_normalize"]= False
#parameters["fourier_fillna"] = False

#output_file = SSH1_DFT_BEST_04_WAVEVECTOR_SITES_FROM_BEST_04_REAL_SPACE_SITES_SIMULATION_NOTEBOOK
#pm.execute_notebook(template,
#                    output_file,
#                    parameters=parameters,
#                    kernel_name=kernel_name,
#                    nest_asyncio=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
CPU times: user 265 µs, sys: 13 µs, total: 278 µs
Wall time: 240 µs


#### SSH1 DCT all wavevector sites


In [12]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_DCT_ALL_WAVEVECTOR_SITES
parameters["features_to_use"] = None#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 15
parameters["random_state"] += seed

### Fourier features
parameters["fourier_features_to_use"] = None#list(range(0,51,1))
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH1_DCT_ALL_WAVEVECTOR_SITES_SIMULATION_NOTEBOOK
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…


CPU times: user 4.91 s, sys: 82.1 ms, total: 4.99 s
Wall time: 17min 20s
CPU times: user 4.91 s, sys: 82.1 ms, total: 5 s
Wall time: 17min 20s


{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T05:12:11.260842',
     'end_time': '2020-07-18T05:12:11.281462',
     'duration': 0.02062,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T05:12:11.298414',
     'end_time': '2020-07-18T05:12:11.315240',

#### SSH1 DCT best 04 wavevector sites


In [13]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

json_dir = os.path.join(SSH1_DCT_ALL_WAVEVECTOR_SITES,"feature_importances")
filename = os.path.join(json_dir,"sorted_feature_importance.json")
with open(filename) as f:
    json_data = json.load(f)
#feature_importances = json.load(os.path.join(json_dir,"feature_importance.json"))
wavevector_feature_importances = {int(k): v for k,v in json_data.items()}
wavevector_feature_importances = list(wavevector_feature_importances.keys())
print("wavevector_feature_importances")
print(wavevector_feature_importances)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
wavevector_feature_importances
[2, 1, 49, 36, 48, 14, 13, 12, 30, 0, 18, 38, 37, 16, 32, 34, 15, 10, 35, 20, 11, 31, 3, 19, 39, 40, 50, 8, 47, 26, 42, 24, 17, 22, 28, 7, 46, 9, 33, 41, 4, 27, 23, 29, 21, 43, 6, 5, 44, 45, 25]
CPU times: user 667 µs, sys: 27 µs, total: 694 µs
Wall time: 412 µs
CPU times: user 1.99 ms, sys: 79 µs, total: 2.07 ms
Wall time: 1.72 ms


In [14]:
### Collecting lattice sites

N_half = 50
N_sites = 4

### Collecting wavevector lattice sites

K_1=[]
for s in wavevector_feature_importances:
    #if s<=N_half:
    K_1.append(s)
    if len(K_1)==N_sites:
        break
K_1 = sorted(K_1)

parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_DCT_BEST_04_WAVEVECTOR_SITES
parameters["features_to_use"] = None
parameters["random_state"] = 16
parameters["random_state"] += seed

### Fourier features
parameters["fourier_features_to_use"] = K_1
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH1_DCT_BEST_04_WAVEVECTOR_SITES_SIMULATION_NOTEBOOK
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T05:31:02.364229',
     'end_time': '2020-07-18T05:31:02.385219',
     'duration': 0.02099,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T05:31:02.402209',
     'end_time': '2020-07-18T05:31:02.419305',

#### SSH1 DCT best 04 wavevector sites from best 04 real space sites

In [15]:
parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_DCT_BEST_04_WAVEVECTOR_SITES_FROM_BEST_04_REAL_SPACE_SITES
parameters["features_to_use"] = S_1
parameters["random_state"] = 17
parameters["random_state"] += seed

### Fourier features
parameters["fourier_features_to_use"] = K_1
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH1_DCT_BEST_04_WAVEVECTOR_SITES_FROM_BEST_04_REAL_SPACE_SITES_SIMULATION_NOTEBOOK
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T05:33:58.128678',
     'end_time': '2020-07-18T05:33:58.149220',
     'duration': 0.020542,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T05:33:58.166200',
     'end_time': '2020-07-18T05:33:58.183063'

#### SSH1 DST all wavevector sites


In [16]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_DST_ALL_WAVEVECTOR_SITES
parameters["features_to_use"] = None#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 18
parameters["random_state"] += seed

### Fourier features
parameters["fourier_features_to_use"] = None#list(range(0,51,1))
parameters["fourier_mode"] = "dst"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH1_DST_ALL_WAVEVECTOR_SITES_SIMULATION_NOTEBOOK
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…


CPU times: user 5.19 s, sys: 129 ms, total: 5.32 s
Wall time: 24min 8s
CPU times: user 5.2 s, sys: 129 ms, total: 5.32 s
Wall time: 24min 8s


{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T05:42:56.373743',
     'end_time': '2020-07-18T05:42:56.390954',
     'duration': 0.017211,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T05:42:56.423520',
     'end_time': '2020-07-18T05:42:56.439907'

#### SSH1 DST best 04 wavevector sites


In [17]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

json_dir = os.path.join(SSH1_DST_ALL_WAVEVECTOR_SITES,"feature_importances")
filename = os.path.join(json_dir,"sorted_feature_importance.json")
with open(filename) as f:
    json_data = json.load(f)
#feature_importances = json.load(os.path.join(json_dir,"feature_importance.json"))
wavevector_feature_importances = {int(k): v for k,v in json_data.items()}
wavevector_feature_importances = list(wavevector_feature_importances.keys())
print("wavevector_feature_importances")
print(wavevector_feature_importances)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
wavevector_feature_importances
[30, 18, 0, 28, 15, 1, 33, 35, 31, 29, 17, 9, 11, 19, 39, 37, 10, 22, 41, 7, 16, 13, 5, 40, 26, 27, 20, 23, 45, 47, 32, 12, 43, 21, 48, 42, 14, 25, 34, 36, 44, 6, 8, 2, 3, 38, 4, 46, 24]
CPU times: user 746 µs, sys: 23 µs, total: 769 µs
Wall time: 421 µs
CPU times: user 2.63 ms, sys: 59 µs, total: 2.69 ms
Wall time: 2.18 ms


In [18]:
### Collecting lattice sites

N_half = 50
N_sites = 4

### Collecting wavevector lattice sites

K_1=[]
for s in wavevector_feature_importances:
    #if s<=N_half:
    K_1.append(s)
    if len(K_1)==N_sites:
        break
K_1 = sorted(K_1)

parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_DST_BEST_04_WAVEVECTOR_SITES
parameters["features_to_use"] = None
parameters["random_state"] = 19
parameters["random_state"] += seed

### Fourier features
parameters["fourier_features_to_use"] = K_1
parameters["fourier_mode"] = "dst"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH1_DST_BEST_04_WAVEVECTOR_SITES_SIMULATION_NOTEBOOK
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T06:08:36.129966',
     'end_time': '2020-07-18T06:08:36.186940',
     'duration': 0.056974,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T06:08:36.207035',
     'end_time': '2020-07-18T06:08:36.224021'

#### SSH1 DST best 04 wavevector sites from best 04 real space sites

In [19]:
parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_DST_BEST_04_WAVEVECTOR_SITES_FROM_BEST_04_REAL_SPACE_SITES
parameters["features_to_use"] = S_1
parameters["random_state"] = 20
parameters["random_state"] += seed

### Fourier features
parameters["fourier_features_to_use"] = K_1
parameters["fourier_mode"] = "dst"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH1_DST_BEST_04_WAVEVECTOR_SITES_FROM_BEST_04_REAL_SPACE_SITES_SIMULATION_NOTEBOOK
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T06:13:40.221800',
     'end_time': '2020-07-18T06:13:40.278350',
     'duration': 0.05655,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T06:13:40.298661',
     'end_time': '2020-07-18T06:13:40.315550',

# SSH2 periodic_100_6561

In [1]:
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

template = TEMPLATE_NOTEBOOK
parameters = {"model_kw":{"criterion":"entropy", "n_estimators":25, "n_jobs":-1}, \
             "allowed_windings":[-1,0,1,2], "val_split":0.5, "features_to_use":None, "shuffle_features":False,\
             "n_experiments":100, "start_n":0, "fit_params":None, "shuffle_rows": True,"pred_params":None,\
             "random_features":False, "store_in_lists":False, "save_eigenvector":True,\
             "save_hamiltonian":True, "save_accuracy":True, "save_models":True,\
             }
kernel_name = KERNEL_NAME
seed = 2000

CPU times: user 733 ms, sys: 256 ms, total: 989 ms
Wall time: 595 ms


#### SSH2 Real space all lattice sites 

In [2]:
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

### parameters
parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_REAL_SPACE_ALL_SITES
parameters["features_to_use"] = None #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 10
parameters["random_state"] += seed
### Fourier features
parameters["fourier_features_to_use"] = None
parameters["fourier_mode"] = None
parameters["fourier_real"] = None
parameters["fourier_normalize"]= None
parameters["fourier_fillna"] = None

output_file = SSH2_REAL_SPACE_ALL_SITES_SIMULATION_NOTEBOOK 
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True);





The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…


CPU times: user 7.95 s, sys: 269 ms, total: 8.22 s
Wall time: 1h 10min 40s


{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T06:24:44.483288',
     'end_time': '2020-07-18T06:24:44.503524',
     'duration': 0.020236,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T06:24:44.520129',
     'end_time': '2020-07-18T06:24:44.536904'

#### SSH2 Real space best 12 lattice sites



In [5]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

json_dir = os.path.join(SSH2_REAL_SPACE_ALL_SITES,"feature_importances")
filename = os.path.join(json_dir,"sorted_feature_importance.json")
with open(filename) as f:
    json_data = json.load(f)
#feature_importances = json.load(os.path.join(json_dir,"feature_importance.json"))
feature_importances = {int(k): v for k,v in json_data.items()}
feature_importances = list(feature_importances.keys())
print("feature_iportances")
print(feature_importances)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
feature_iportances
[1, 98, 99, 0, 96, 3, 97, 2, 5, 94, 95, 4, 48, 51, 50, 7, 92, 49, 93, 6, 46, 53, 9, 52, 90, 47, 44, 55, 91, 8, 11, 89, 10, 88, 54, 45, 42, 57, 56, 13, 86, 43, 87, 12, 40, 59, 15, 85, 58, 41, 14, 84, 83, 16, 61, 67, 38, 25, 32, 27, 34, 65, 75, 39, 73, 74, 17, 60, 72, 33, 37, 62, 66, 77, 81, 29, 26, 35, 82, 79, 64, 18, 24, 63, 36, 31, 22, 70, 20, 21, 68, 23, 30, 71, 69, 78, 19, 76, 80, 28]
CPU times: user 1.01 ms, sys: 67 µs, total: 1.08 ms
Wall time: 644 µs
CPU times: user 2.42 ms, sys: 161 µs, total: 2.58 ms
Wall time: 2.14 ms


In [6]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

### Collecting lattice sites
N_half = 50
N_sites = 12
S_2 = []
for s in feature_importances:
    if s<=N_half:
        S_2.append(s)
    if len(S_2)==N_sites:
        break
S_2 = sorted(S_2)

### parameters
parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_REAL_SPACE_BEST_12_SITES
parameters["features_to_use"] = S_2 #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 11
parameters["random_state"] += seed
### Fourier features
parameters["fourier_features_to_use"] = None
parameters["fourier_mode"] = None
parameters["fourier_real"] = None
parameters["fourier_normalize"]= None
parameters["fourier_fillna"] = None

output_file = SSH2_REAL_SPACE_BEST_12_SITES_SIMULATION_NOTEBOOK
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True);





The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…


CPU times: user 5.24 s, sys: 93 ms, total: 5.34 s
Wall time: 24min 56s
CPU times: user 5.25 s, sys: 93.1 ms, total: 5.34 s
Wall time: 24min 56s


{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T15:18:47.875241',
     'end_time': '2020-07-18T15:18:47.894395',
     'duration': 0.019154,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T15:18:47.911398',
     'end_time': '2020-07-18T15:18:47.927579'

#### SSH2 DFT all wavevector sites


In [7]:
%%time
%%time
%load_ext autoreload
%autoreload 2
#from simulation import *
#
#parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
#parameters["model_name"] = "RandomForestClassifier"
#parameters["simulation_dir"] = SSH2_DFT_ALL_WAVEVECTOR_SITES
#parameters["features_to_use"] = None#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
#parameters["random_state"] = 12
#parameters["random_state"] += seed

#### Fourier features
#parameters["fourier_features_to_use"] = None#list(range(0,51,1))
#parameters["fourier_mode"] = "dft"
#parameters["fourier_real"] = None
#parameters["fourier_normalize"]= False
#parameters["fourier_fillna"] = False

#output_file = SSH2_DFT_ALL_WAVEVECTOR_SITES_SIMULATION_NOTEBOOK
#pm.execute_notebook(template,
#                    output_file,
#                    parameters=parameters,
#                    kernel_name=kernel_name,
#                    nest_asyncio=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…


CPU times: user 4.96 s, sys: 112 ms, total: 5.08 s
Wall time: 17min 29s
CPU times: user 4.96 s, sys: 112 ms, total: 5.08 s
Wall time: 17min 29s


{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-16T15:42:48.908498',
     'end_time': '2020-07-16T15:42:48.963362',
     'duration': 0.054864,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-16T15:42:48.983151',
     'end_time': '2020-07-16T15:42:48.999066'

#### SSH2 DFT best 12 wavevector sites


In [8]:
%%time
%%time
%load_ext autoreload
%autoreload 2
#from simulation import *

#json_dir = os.path.join(SSH2_DFT_ALL_WAVEVECTOR_SITES,"feature_importances")
#filename = os.path.join(json_dir,"sorted_feature_importance.json")
#with open(filename) as f:
#    json_data = json.load(f)
#feature_importances = json.load(os.path.join(json_dir,"feature_importance.json"))
#wavevector_feature_importances = {int(k): v for k,v in json_data.items()}
#wavevector_feature_importances = list(wavevector_feature_importances.keys())
#print("wavevector_feature_importances")
#print(wavevector_feature_importances)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
wavevector_feature_iportances
[2, 1, 48, 49, 14, 36, 12, 30, 13, 37, 38, 20, 32, 18, 3, 16, 31, 0, 34, 10, 35, 15, 50, 39, 19, 11, 40, 47, 22, 7, 8, 24, 17, 41, 42, 28, 9, 33, 26, 46, 4, 23, 27, 29, 21, 6, 43, 44, 5, 45, 25]
CPU times: user 964 µs, sys: 0 ns, total: 964 µs
Wall time: 592 µs
CPU times: user 4.07 ms, sys: 100 µs, total: 4.17 ms
Wall time: 3.78 ms


In [9]:
### Collecting lattice sites

#N_half = 50
#N_sites = 12

### Collecting wavevector lattice sites

#K_2=[]
#for s in wavevector_feature_importances:
    #if s<=N_half:
#    K_2.append(s)
#    if len(K_2)==N_sites:
#        break
#K_2 = sorted(K_2)

#parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
#parameters["model_name"] = "RandomForestClassifier"
#parameters["simulation_dir"] = SSH2_DFT_BEST_12_WAVEVECTOR_SITES
#parameters["features_to_use"] = None
#parameters["random_state"] = 13
#parameters["random_state"] += seed

### Fourier features
#parameters["fourier_features_to_use"] = K_2
#parameters["fourier_mode"] = "dft"
#parameters["fourier_real"] = None
#parameters["fourier_normalize"]= False
#parameters["fourier_fillna"] = False

#output_file = SSH2_DFT_BEST_12_WAVEVECTOR_SITES_SIMULATION_NOTEBOOK
#pm.execute_notebook(template,
#                    output_file,
#                    parameters=parameters,
#                    kernel_name=kernel_name,
#                    nest_asyncio=True)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-16T16:02:29.825602',
     'end_time': '2020-07-16T16:02:29.842034',
     'duration': 0.016432,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-16T16:02:29.858936',
     'end_time': '2020-07-16T16:02:29.875011'

#### SSH2 DFT best 12 wavevector sites from best 12 real space sites

In [10]:
#parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
#parameters["model_name"] = "RandomForestClassifier"
#parameters["simulation_dir"] = SSH2_DFT_BEST_12_WAVEVECTOR_SITES_FROM_BEST_12_REAL_SPACE_SITES
#parameters["features_to_use"] = S_2
#parameters["random_state"] = 14
#parameters["random_state"] += seed

### Fourier features
#parameters["fourier_features_to_use"] = K_2
#parameters["fourier_mode"] = "dft"
#parameters["fourier_real"] = None
#parameters["fourier_normalize"]= False
#parameters["fourier_fillna"] = False

#output_file = SSH2_DFT_BEST_12_WAVEVECTOR_SITES_FROM_BEST_12_REAL_SPACE_SITES_SIMULATION_NOTEBOOK
#pm.execute_notebook(template,
#                    output_file,
#                    parameters=parameters,
#                    kernel_name=kernel_name,
#                    nest_asyncio=True)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-16T16:05:50.311010',
     'end_time': '2020-07-16T16:05:50.365910',
     'duration': 0.0549,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-16T16:05:50.386366',
     'end_time': '2020-07-16T16:05:50.404103',


#### SSH2 DCT all wavevector sites


In [3]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_DCT_ALL_WAVEVECTOR_SITES
parameters["features_to_use"] = None#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 15
parameters["random_state"] += seed

### Fourier features
parameters["fourier_features_to_use"] = None#list(range(0,51,1))
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH2_DCT_ALL_WAVEVECTOR_SITES_SIMULATION_NOTEBOOK
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…


CPU times: user 7.48 s, sys: 272 ms, total: 7.75 s
Wall time: 1h 3min 19s
CPU times: user 7.48 s, sys: 272 ms, total: 7.75 s
Wall time: 1h 3min 19s


{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T07:35:24.523034',
     'end_time': '2020-07-18T07:35:24.543468',
     'duration': 0.020434,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T07:35:24.560422',
     'end_time': '2020-07-18T07:35:24.576920'

#### SSH2 DCT best 12 wavevector sites


In [7]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

json_dir = os.path.join(SSH2_DCT_ALL_WAVEVECTOR_SITES,"feature_importances")
filename = os.path.join(json_dir,"sorted_feature_importance.json")
with open(filename) as f:
    json_data = json.load(f)
#feature_importances = json.load(os.path.join(json_dir,"feature_importance.json"))
wavevector_feature_importances = {int(k): v for k,v in json_data.items()}
wavevector_feature_importances = list(wavevector_feature_importances.keys())
print("wavevector_feature_importances")
print(wavevector_feature_importances)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
wavevector_feature_importances
[1, 49, 0, 3, 50, 2, 5, 47, 48, 4, 7, 6, 45, 40, 46, 10, 44, 27, 9, 30, 8, 43, 41, 42, 29, 39, 20, 26, 31, 28, 11, 19, 23, 25, 24, 32, 21, 18, 22, 17, 33, 38, 12, 16, 34, 15, 14, 35, 13, 37, 36]
CPU times: user 892 µs, sys: 0 ns, total: 892 µs
Wall time: 625 µs
CPU times: user 3.04 ms, sys: 0 ns, total: 3.04 ms
Wall time: 2.76 ms


In [8]:
### Collecting lattice sites

N_half = 50
N_sites = 12

### Collecting wavevector lattice sites

K_2=[]
for s in wavevector_feature_importances:
    #if s<=N_half:
    K_2.append(s)
    if len(K_2)==N_sites:
        break
K_2 = sorted(K_2)

parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_DCT_BEST_12_WAVEVECTOR_SITES
parameters["features_to_use"] = None
parameters["random_state"] = 16
parameters["random_state"] += seed

### Fourier features
parameters["fourier_features_to_use"] = K_2
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH2_DCT_BEST_12_WAVEVECTOR_SITES_SIMULATION_NOTEBOOK
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T15:45:25.832155',
     'end_time': '2020-07-18T15:45:25.852563',
     'duration': 0.020408,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T15:45:25.869505',
     'end_time': '2020-07-18T15:45:25.886744'

#### SSH2 DCT best 12 wavevector sites from best 12 real space sites

In [9]:
parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_DCT_BEST_12_WAVEVECTOR_SITES_FROM_BEST_12_REAL_SPACE_SITES
parameters["features_to_use"] = S_2
parameters["random_state"] = 17
parameters["random_state"] += seed

### Fourier features
parameters["fourier_features_to_use"] = K_2
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH2_DCT_BEST_12_WAVEVECTOR_SITES_FROM_BEST_12_REAL_SPACE_SITES_SIMULATION_NOTEBOOK
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T16:27:23.217944',
     'end_time': '2020-07-18T16:27:23.238262',
     'duration': 0.020318,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T16:27:23.255226',
     'end_time': '2020-07-18T16:27:23.271869'

#### SSH2 DST all wavevector sites


In [4]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_DST_ALL_WAVEVECTOR_SITES
parameters["features_to_use"] = None#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 18
parameters["random_state"] += seed

### Fourier features
parameters["fourier_features_to_use"] = None#list(range(0,51,1))
parameters["fourier_mode"] = "dst"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH2_DST_ALL_WAVEVECTOR_SITES_SIMULATION_NOTEBOOK
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…


CPU times: user 7.37 s, sys: 283 ms, total: 7.65 s
Wall time: 53min 58s
CPU times: user 7.37 s, sys: 283 ms, total: 7.65 s
Wall time: 53min 58s


{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T08:38:44.162855',
     'end_time': '2020-07-18T08:38:44.180190',
     'duration': 0.017335,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T08:38:44.199177',
     'end_time': '2020-07-18T08:38:44.215535'

#### SSH2 DST best 12 wavevector sites


In [10]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

json_dir = os.path.join(SSH2_DST_ALL_WAVEVECTOR_SITES,"feature_importances")
filename = os.path.join(json_dir,"sorted_feature_importance.json")
with open(filename) as f:
    json_data = json.load(f)
#feature_importances = json.load(os.path.join(json_dir,"feature_importance.json"))
wavevector_feature_importances = {int(k): v for k,v in json_data.items()}
wavevector_feature_importances = list(wavevector_feature_importances.keys())
print("wavevector_feature_importances")
print(wavevector_feature_importances)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
wavevector_feature_importances
[0, 48, 47, 1, 3, 2, 45, 46, 44, 4, 5, 43, 6, 42, 7, 41, 8, 9, 39, 40, 10, 38, 24, 25, 23, 22, 26, 29, 19, 21, 11, 20, 28, 37, 27, 18, 30, 12, 16, 36, 17, 31, 32, 14, 34, 15, 35, 13, 33]
CPU times: user 849 µs, sys: 44 µs, total: 893 µs
Wall time: 580 µs
CPU times: user 2.07 ms, sys: 106 µs, total: 2.17 ms
Wall time: 1.85 ms


In [11]:
### Collecting lattice sites

N_half = 50
N_sites = 12

### Collecting wavevector lattice sites

K_2=[]
for s in wavevector_feature_importances:
    #if s<=N_half:
    K_2.append(s)
    if len(K_2)==N_sites:
        break
K_2 = sorted(K_2)

parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_DST_BEST_12_WAVEVECTOR_SITES
parameters["features_to_use"] = None
parameters["random_state"] = 19
parameters["random_state"] += seed

### Fourier features
parameters["fourier_features_to_use"] = K_2
parameters["fourier_mode"] = "dst"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH2_DST_BEST_12_WAVEVECTOR_SITES_SIMULATION_NOTEBOOK
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T17:10:10.139381',
     'end_time': '2020-07-18T17:10:10.197622',
     'duration': 0.058241,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T17:10:10.218228',
     'end_time': '2020-07-18T17:10:10.235647'

#### SSH2 DST best 12 wavevector sites from best 12 real space sites

In [12]:
parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_DST_BEST_12_WAVEVECTOR_SITES_FROM_BEST_12_REAL_SPACE_SITES
parameters["features_to_use"] = S_2
parameters["random_state"] = 20
parameters["random_state"] += seed

### Fourier features
parameters["fourier_features_to_use"] = K_2
parameters["fourier_mode"] = "dst"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH2_DST_BEST_12_WAVEVECTOR_SITES_FROM_BEST_12_REAL_SPACE_SITES_SIMULATION_NOTEBOOK
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=58.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T17:57:59.714817',
     'end_time': '2020-07-18T17:57:59.770623',
     'duration': 0.055806,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-18T17:57:59.790430',
     'end_time': '2020-07-18T17:57:59.807669'