# Running simulation template

This notebook runs the template notebook `simulation_template.ipynb`, which performs simulations for nearest neighbours (ssh1) and second neighbours (ssh2) systems.

## SSH1

In [1]:
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

template = TEMPLATE_NOTEBOOK
parameters = {"model_kw":{"criterion":"entropy"}, \
             "allowed_windings":[0,1], "val_split":0.9, "features_to_use":None, "shuffle_features":False,\
             "n_experiments":100, "start_n":0, "fit_params":None, "shuffle_rows": True,"pred_params":None,\
             "random_features":False, "store_in_lists":False, "save_eigenvector":True,\
             "save_hamiltonian":True, "save_accuracy":True, "save_models":True,\
             }
kernel_name = KERNEL_NAME

CPU times: user 753 ms, sys: 241 ms, total: 993 ms
Wall time: 621 ms


### periodic_100_6561

#### FIRST scenario: Best real space lattice sites in $S_1$

In the first scenario, we run simulations using the $S_1 = (0, 1, 2, 3, 5, 7, 19, 35, 45, 48, 49, 50)$ best lattices in real space, as determined by the information entropy signatures.

$S_1 = (0, 1, 3, 50, 51, 53)$  (original)

In [2]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

# top 50 feature importances
#last_feat = 50
#feat_importances = [(0, 0.16899985193976078), (50, 0.16027277610525203), (51, 0.11079281443086275), (3, 0.0926629612204455), 
# (1, 0.08824562429981071), (53, 0.07988250298048015), (99, 0.015836938014481734), (49, 0.01484315641985572), 
# (2, 0.010822260392078063), (55, 0.008957866126319576), (95, 0.008414674068438486), (98, 0.008057135310372296), 
# (5, 0.007792252406353874), (52, 0.007385587848317109), (7, 0.006623515418571701), (45, 0.006607713818302485), 
# (48, 0.006293213602415334), (19, 0.005933081765279274), (57, 0.005882264224216421), (35, 0.005479889010246498), 
# (69, 0.0054570355907490676), (27, 0.00447417649531073), (85, 0.004458557210976204), (4, 0.00420143796180483), 
# (77, 0.00413570155842476), (61, 0.003994790525128792), (63, 0.0038759003092861083), (59, 0.0036996333790794197), 
# (54, 0.0036347993094256216), (47, 0.0036234625816597755), (71, 0.003603795735903073), (79, 0.0035808150644768126),
# (11, 0.0034640557819812174), (91, 0.0033132048040646726), (87, 0.003107792473377982), (9, 0.003011943068315158),
# (23, 0.0029588897390549406), (43, 0.0028314427237322317), (46, 0.0028254483424383654), (97, 0.0027419754432558936),
# (96, 0.0027136817338326872), (13, 0.002655571858878165), (21, 0.0026312163636595053), (41, 0.0025718635994273122),
# (29, 0.0025170508257494513), (65, 0.00248265078102954), (89, 0.0024791136278104614), (73, 0.0024695425215702053),
# (15, 0.0024054047893902184), (31, 0.002368444403328408), (67, 0.0023612475975730387), (39, 0.002360166063613734),
# (37, 0.002340373455149163), (6, 0.0023400790096368847), (83, 0.0022624432222204875), (25, 0.002251427563336752), 
# (56, 0.002234121290194305), (44, 0.0022329941029173967), (17, 0.0022209013246331454), (33, 0.002174252226738413),
# (75, 0.0021398705213947064), (81, 0.0021256099473102373), (93, 0.002046176894401509), (94, 0.0018772740236961533),
# (8, 0.0018121065730517312), (58, 0.0017649255708184206), (66, 0.0017036816784777539), (84, 0.0016786488685746016),
# (76, 0.0016389031852822328), (68, 0.0016111239456025508), (92, 0.001559795863374426), (32, 0.0015596758827273866),
# (26, 0.0015559663699651752), (40, 0.0015177668518860962), (34, 0.001515199851615081), (90, 0.0015106467003453549),
# (30, 0.0014768067614376738), (74, 0.0014713456121911738), (42, 0.001422004271836671), (24, 0.001413577464975718),
# (60, 0.0013938528594716476), (78, 0.0013873836623677109), (70, 0.0013659325407205777), (16, 0.0013533146503988702),
# (82, 0.0013454259567028603), (12, 0.001340314026983008), (10, 0.001337532070549847), (22, 0.001322642086262051),
# (80, 0.0013164387264242828), (18, 0.0012988499240033154), (88, 0.0012723641055948842), (14, 0.0012684601267185446),
# (20, 0.0012643279564056534), (36, 0.0012609734572397906), (62, 0.0012462203687537326), (86, 0.0012432290782296222),
# (64, 0.0012390639522612999), (72, 0.0012362748022238996), (38, 0.001172217037278506), (28, 0.0010805919114497825)]#

#top_feat = [f for f,_ in feat_importances[:last_feat]]

S_1 = [0, 1, 2, 3, 5, 7, 19, 35, 45, 48, 49, 50]
### parameters
parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_PERIODIC_1ST_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = S_1 #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 244854
### Fourier features
parameters["fourier_features_to_use"] = None
parameters["fourier_mode"] = None
parameters["fourier_real"] = None
parameters["fourier_normalize"]= None
parameters["fourier_fillna"] = None

output_file = SSH1_1ST_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)





The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=54.0, style=ProgressStyle(description_wid…


CPU times: user 3.87 s, sys: 41.8 ms, total: 3.91 s
Wall time: 3min 46s
CPU times: user 3.87 s, sys: 41.8 ms, total: 3.91 s
Wall time: 3min 46s


{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T08:23:14.908229',
     'end_time': '2020-07-09T08:23:14.963357',
     'duration': 0.055128,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T08:23:14.983167',
     'end_time': '2020-07-09T08:23:14.998137'

#### SECOND scenario: Best real space lattice sites in $S'_1$ 

In the second scenario we run simulations using the best $S'_1 = [0, 1, 2, 3, 49, 50]$ real space lattice sites, as determined by the entropy signatures

In [3]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

# top 50 feature importances
#last_feat = 50
#feat_importances = [(0, 0.16899985193976078), (50, 0.16027277610525203), (51, 0.11079281443086275), (3, 0.0926629612204455), 
# (1, 0.08824562429981071), (53, 0.07988250298048015), (99, 0.015836938014481734), (49, 0.01484315641985572), 
# (2, 0.010822260392078063), (55, 0.008957866126319576), (95, 0.008414674068438486), (98, 0.008057135310372296), 
# (5, 0.007792252406353874), (52, 0.007385587848317109), (7, 0.006623515418571701), (45, 0.006607713818302485), 
# (48, 0.006293213602415334), (19, 0.005933081765279274), (57, 0.005882264224216421), (35, 0.005479889010246498), 
# (69, 0.0054570355907490676), (27, 0.00447417649531073), (85, 0.004458557210976204), (4, 0.00420143796180483), 
# (77, 0.00413570155842476), (61, 0.003994790525128792), (63, 0.0038759003092861083), (59, 0.0036996333790794197), 
# (54, 0.0036347993094256216), (47, 0.0036234625816597755), (71, 0.003603795735903073), (79, 0.0035808150644768126),
# (11, 0.0034640557819812174), (91, 0.0033132048040646726), (87, 0.003107792473377982), (9, 0.003011943068315158),
# (23, 0.0029588897390549406), (43, 0.0028314427237322317), (46, 0.0028254483424383654), (97, 0.0027419754432558936),
# (96, 0.0027136817338326872), (13, 0.002655571858878165), (21, 0.0026312163636595053), (41, 0.0025718635994273122),
# (29, 0.0025170508257494513), (65, 0.00248265078102954), (89, 0.0024791136278104614), (73, 0.0024695425215702053),
# (15, 0.0024054047893902184), (31, 0.002368444403328408), (67, 0.0023612475975730387), (39, 0.002360166063613734),
# (37, 0.002340373455149163), (6, 0.0023400790096368847), (83, 0.0022624432222204875), (25, 0.002251427563336752), 
# (56, 0.002234121290194305), (44, 0.0022329941029173967), (17, 0.0022209013246331454), (33, 0.002174252226738413),
# (75, 0.0021398705213947064), (81, 0.0021256099473102373), (93, 0.002046176894401509), (94, 0.0018772740236961533),
# (8, 0.0018121065730517312), (58, 0.0017649255708184206), (66, 0.0017036816784777539), (84, 0.0016786488685746016),
# (76, 0.0016389031852822328), (68, 0.0016111239456025508), (92, 0.001559795863374426), (32, 0.0015596758827273866),
# (26, 0.0015559663699651752), (40, 0.0015177668518860962), (34, 0.001515199851615081), (90, 0.0015106467003453549),
# (30, 0.0014768067614376738), (74, 0.0014713456121911738), (42, 0.001422004271836671), (24, 0.001413577464975718),
# (60, 0.0013938528594716476), (78, 0.0013873836623677109), (70, 0.0013659325407205777), (16, 0.0013533146503988702),
# (82, 0.0013454259567028603), (12, 0.001340314026983008), (10, 0.001337532070549847), (22, 0.001322642086262051),
# (80, 0.0013164387264242828), (18, 0.0012988499240033154), (88, 0.0012723641055948842), (14, 0.0012684601267185446),
# (20, 0.0012643279564056534), (36, 0.0012609734572397906), (62, 0.0012462203687537326), (86, 0.0012432290782296222),
# (64, 0.0012390639522612999), (72, 0.0012362748022238996), (38, 0.001172217037278506), (28, 0.0010805919114497825)]#

#top_feat = [f for f,_ in feat_importances[:last_feat]]

S_1_prime = [0, 1, 2, 3, 49, 50]
### parameters
parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_PERIODIC_2ND_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = S_1_prime #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 93474
### Fourier features
parameters["fourier_features_to_use"] = None
parameters["fourier_mode"] = None
parameters["fourier_real"] = None
parameters["fourier_normalize"]= None
parameters["fourier_fillna"] = None

output_file = SSH1_2ND_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)





The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=54.0, style=ProgressStyle(description_wid…


CPU times: user 3.69 s, sys: 35.2 ms, total: 3.72 s
Wall time: 2min 54s
CPU times: user 3.69 s, sys: 35.2 ms, total: 3.73 s
Wall time: 2min 54s


{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T08:27:00.911797',
     'end_time': '2020-07-09T08:27:00.966015',
     'duration': 0.054218,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T08:27:00.985556',
     'end_time': '2020-07-09T08:27:01.000765'

#### THIRD scenario:DFT of REAL signal, computed from ALL lattice sites (51 features)

The third scenario uses as features the REAL part of the the first HALF of components of the DFT of real space eigenvectors, computed from ALL real space lattice sites. This leads to $N=51$ engineered features. Note that in this case we use ALL $N=100$ features in real space to compute the DFT.

In [4]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_PERIODIC_3RD_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = None#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 467

### Fourier features
parameters["fourier_features_to_use"] = list(range(0,51,1))
parameters["fourier_mode"] = "dft"
parameters["fourier_real"] = "real"
parameters["normalize"]= False
parameters["fillna"] = False

output_file = SSH1_3RD_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=54.0, style=ProgressStyle(description_wid…


CPU times: user 5.15 s, sys: 206 ms, total: 5.36 s
Wall time: 33min 58s
CPU times: user 5.15 s, sys: 206 ms, total: 5.36 s
Wall time: 33min 58s


{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T08:29:55.742438',
     'end_time': '2020-07-09T08:29:55.761787',
     'duration': 0.019349,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T08:29:55.777572',
     'end_time': '2020-07-09T08:29:55.792846'

#### FOURTH scenario: DCT of EVEN-symmetric REAL signal (51 features) 

The fourth scenario uses as features the DCT computed assuming that the real space vectors are REAL and EVEN-symmetric around 0 and $\frac{N}{2}$. The wavevector space components are computed using all EVEN and ODD real lattice sites from the first half of the real space lattice. The number of resulting features is $\frac{N}{2}+1 = 51$

In [5]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_PERIODIC_4TH_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = None#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 548936312

### Fourier features
parameters["fourier_features_to_use"] = list(range(0,51,1))
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH1_4TH_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=54.0, style=ProgressStyle(description_wid…


CPU times: user 4.52 s, sys: 121 ms, total: 4.64 s
Wall time: 17min 36s
CPU times: user 4.52 s, sys: 121 ms, total: 4.64 s
Wall time: 17min 36s


{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T09:03:54.281123',
     'end_time': '2020-07-09T09:03:54.300520',
     'duration': 0.019397,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T09:03:54.316375',
     'end_time': '2020-07-09T09:03:54.331330'

#### FIFTH scenario: DCT of EVEN-symmetric REAL signal, using only EVEN components of DCF (2 5features) 

The fifth scenario uses as features only the EVEN components of the DCT computed assuming that the real space vectors are REAL and EVEN-symmetric around 0 and $\frac{N}{2}$. The wavevector space components are computed using all EVEN and ODD real lattice sites from the first half of the real space lattice. The resulting number of features is $\frac{N}{4}+1=26$

In [6]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_PERIODIC_5TH_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = None#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 34896

### Fourier features
parameters["fourier_features_to_use"] = list(range(0,51,2))
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH1_5TH_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=54.0, style=ProgressStyle(description_wid…


CPU times: user 4.16 s, sys: 82.4 ms, total: 4.24 s
Wall time: 11min 30s
CPU times: user 4.16 s, sys: 82.4 ms, total: 4.25 s
Wall time: 11min 30s


{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T09:21:30.347329',
     'end_time': '2020-07-09T09:21:30.402057',
     'duration': 0.054728,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T09:21:30.421786',
     'end_time': '2020-07-09T09:21:30.436750'

#### SIXTH scenario: DCT of EVEN-symmetric REAL signal, using only eigenmodes $K_1$ in wavevector space

The sixth scenario uses as features only the $K_1 = (0, 2, 4, 6, 8, 10, 12, 14, 32, 34, 38, 50)$ components of the DCT computed assuming that the real space vectors are REAL and EVEN-symmetric around 0 and $\frac{N}{2}$. The wavevector space components are computed using all EVEN and ODD real lattice sites from the first half of the real space lattice. The resulting number of features is $~\frac{N}{8}~12$

In [7]:
#wave_vector_feat_importances = 
#[(0, 0.4923328511353009), (2, 0.05251497113726127), (98, 0.05251497113726127), (4, 0.04210544679644646), 
# (96, 0.04210544679644646), (6, 0.030226632887051755), (94, 0.030226632887051755), (8, 0.018650735073436106), 
# (92, 0.018650735073436106), (50, 0.012961892338709716), (10, 0.012925218870197968), (90, 0.012925218870197968),
# (12, 0.010951004272627428), (88, 0.010951004272627428), (38, 0.009288008288961948), (62, 0.009288008288961948),
# (34, 0.006404152033276135), (66, 0.006404152033276135), (32, 0.0062081926076385675), (68, 0.0062081926076385675),
# (14, 0.006115572674571644), (86, 0.006115572674571644), (26, 0.0051176102107449405), (74, 0.0051176102107449405),
# (36, 0.005082106737029533), (64, 0.005082106737029533), (40, 0.004987092425987919), (60, 0.004987092425987919),
# (16, 0.004878620619281358), (84, 0.004878620619281358), (30, 0.0044167165098046935), (70, 0.0044167165098046935),
# (42, 0.004122271626061276), (58, 0.004122271626061276), (18, 0.0036323535281528436), (82, 0.0036323535281528436),
# (44, 0.003621275691890175), (56, 0.003621275691890175), (28, 0.003554002540922099), (72, 0.003554002540922099),
# (20, 0.0031565390121082292), (80, 0.0031565390121082292), (22, 0.003104932279285677), (78, 0.003104932279285677),
# (24, 0.0023434130859103976), (76, 0.0023434130859103976), (46, 0.001962997858913271), (54, 0.001962997858913271),
# (48, 0.001160614305452886), (52, 0.001160614305452886), (25, 0.00026770896438104477), (75, 0.00026770896438104477),
# (27, 4.96076009976231e-05), (73, 4.96076009976231e-05), (49, 4.334352622524924e-05), (51, 4.334352622524924e-05),
# (43, 4.32401785122011e-05), (57, 4.32401785122011e-05), (41, 4.035113704171787e-05), (59, 4.035113704171787e-05), 
# (47, 3.7834849707335936e-05), (53, 3.7834849707335936e-05), (39, 3.644840834568696e-05), (61, 3.644840834568696e-05),
# (21, 3.368925420169474e-05), (79, 3.368925420169474e-05), (37, 3.1298321084773594e-05), (63, 3.1298321084773594e-05),
# (23, 3.0469942485146163e-05), (77, 3.0469942485146163e-05), (29, 3.042021740530591e-05), (71, 3.042021740530591e-05),
# (19, 2.4335483054333445e-05), (81, 2.4335483054333445e-05), (15, 1.8873221744361873e-05), (85, 1.8873221744361873e-05),
# (45, 1.6807151731166545e-05), (55, 1.6807151731166545e-05), (11, 1.5392266207914976e-05), (89, 1.5392266207914976e-05),
# (3, 1.4882016447273064e-05), (97, 1.4882016447273064e-05), (35, 1.3591627536666368e-05), (65, 1.3591627536666368e-05), 
# (17, 1.1821295367191327e-05), (83, 1.1821295367191327e-05), (31, 1.1819688814982332e-05), (69, 1.1819688814982332e-05),
# (7, 1.1140097886458889e-05), (93, 1.1140097886458889e-05), (5, 1.042499603855921e-05), (95, 1.042499603855921e-05),
# (1, 1.000301277463957e-05), (99, 1.000301277463957e-05), (13, 8.510388592953844e-06), (87, 8.510388592953844e-06),
# (33, 6.94226485909972e-06), (67, 6.94226485909972e-06), (9, 3.1912785366739184e-06), (91, 3.1912785366739184e-06)]

# Wavevector eigenmodes
K_1 = [0, 2, 4, 6, 8, 10, 12, 14, 32, 34, 38, 50]

parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_PERIODIC_6TH_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = None#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 34896

### Fourier features
parameters["fourier_features_to_use"] = K_1
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH1_6TH_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=54.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T09:33:01.291036',
     'end_time': '2020-07-09T09:33:01.307555',
     'duration': 0.016519,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T09:33:01.324239',
     'end_time': '2020-07-09T09:33:01.338883'

#### SEVENTH scenario: DCT of EVEN-symmetric REAL signal, using only eigenmodes $K'_1$ in wavevector space

The seventh scenario uses as features only the $K'_1 = (0, 2, 4, 6, 8, 50)$ components of the DCT computed assuming that the real space vectors are REAL and EVEN-symmetric around 0 and $\frac{N}{2}$. The wavevector space components are computed using all EVEN and ODD real lattice sites from the first half of the real space lattice. The resulting number of features is $~\frac{N}{16} ~ 6$

In [8]:
#wave_vector_feat_importances = 
#[(0, 0.4923328511353009), (2, 0.05251497113726127), (98, 0.05251497113726127), (4, 0.04210544679644646), 
# (96, 0.04210544679644646), (6, 0.030226632887051755), (94, 0.030226632887051755), (8, 0.018650735073436106), 
# (92, 0.018650735073436106), (50, 0.012961892338709716), (10, 0.012925218870197968), (90, 0.012925218870197968),
# (12, 0.010951004272627428), (88, 0.010951004272627428), (38, 0.009288008288961948), (62, 0.009288008288961948),
# (34, 0.006404152033276135), (66, 0.006404152033276135), (32, 0.0062081926076385675), (68, 0.0062081926076385675),
# (14, 0.006115572674571644), (86, 0.006115572674571644), (26, 0.0051176102107449405), (74, 0.0051176102107449405),
# (36, 0.005082106737029533), (64, 0.005082106737029533), (40, 0.004987092425987919), (60, 0.004987092425987919),
# (16, 0.004878620619281358), (84, 0.004878620619281358), (30, 0.0044167165098046935), (70, 0.0044167165098046935),
# (42, 0.004122271626061276), (58, 0.004122271626061276), (18, 0.0036323535281528436), (82, 0.0036323535281528436),
# (44, 0.003621275691890175), (56, 0.003621275691890175), (28, 0.003554002540922099), (72, 0.003554002540922099),
# (20, 0.0031565390121082292), (80, 0.0031565390121082292), (22, 0.003104932279285677), (78, 0.003104932279285677),
# (24, 0.0023434130859103976), (76, 0.0023434130859103976), (46, 0.001962997858913271), (54, 0.001962997858913271),
# (48, 0.001160614305452886), (52, 0.001160614305452886), (25, 0.00026770896438104477), (75, 0.00026770896438104477),
# (27, 4.96076009976231e-05), (73, 4.96076009976231e-05), (49, 4.334352622524924e-05), (51, 4.334352622524924e-05),
# (43, 4.32401785122011e-05), (57, 4.32401785122011e-05), (41, 4.035113704171787e-05), (59, 4.035113704171787e-05), 
# (47, 3.7834849707335936e-05), (53, 3.7834849707335936e-05), (39, 3.644840834568696e-05), (61, 3.644840834568696e-05),
# (21, 3.368925420169474e-05), (79, 3.368925420169474e-05), (37, 3.1298321084773594e-05), (63, 3.1298321084773594e-05),
# (23, 3.0469942485146163e-05), (77, 3.0469942485146163e-05), (29, 3.042021740530591e-05), (71, 3.042021740530591e-05),
# (19, 2.4335483054333445e-05), (81, 2.4335483054333445e-05), (15, 1.8873221744361873e-05), (85, 1.8873221744361873e-05),
# (45, 1.6807151731166545e-05), (55, 1.6807151731166545e-05), (11, 1.5392266207914976e-05), (89, 1.5392266207914976e-05),
# (3, 1.4882016447273064e-05), (97, 1.4882016447273064e-05), (35, 1.3591627536666368e-05), (65, 1.3591627536666368e-05), 
# (17, 1.1821295367191327e-05), (83, 1.1821295367191327e-05), (31, 1.1819688814982332e-05), (69, 1.1819688814982332e-05),
# (7, 1.1140097886458889e-05), (93, 1.1140097886458889e-05), (5, 1.042499603855921e-05), (95, 1.042499603855921e-05),
# (1, 1.000301277463957e-05), (99, 1.000301277463957e-05), (13, 8.510388592953844e-06), (87, 8.510388592953844e-06),
# (33, 6.94226485909972e-06), (67, 6.94226485909972e-06), (9, 3.1912785366739184e-06), (91, 3.1912785366739184e-06)]

# Wavevector eigenmodes
K_1_prime = [0, 2, 4, 6, 8, 50]

parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_PERIODIC_7TH_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = None#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 34896

### Fourier features
parameters["fourier_features_to_use"] = K_1_prime
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH1_7TH_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=54.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T09:38:53.567468',
     'end_time': '2020-07-09T09:38:53.598887',
     'duration': 0.031419,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T09:38:53.621680',
     'end_time': '2020-07-09T09:38:53.637319'

#### EIGHTH scenario: DCT of EVEN-symmetric REAL signal, using only sites $S_1$ in real space and eigenmodes $K_1$ in wavevector space

In [9]:
#Real space lattice sites
S_1 = [0, 1, 2, 3, 5, 7, 19, 35, 45, 48, 49, 50]

# Wavevector eigenmodes
K_1 = [0, 2, 4, 6, 8, 10, 12, 14, 32, 34, 38, 50]

parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_PERIODIC_8TH_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = S_1#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 34896

### Fourier features
parameters["fourier_features_to_use"] = K_1
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH1_8TH_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=54.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T09:42:57.833868',
     'end_time': '2020-07-09T09:42:57.886787',
     'duration': 0.052919,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T09:42:57.906384',
     'end_time': '2020-07-09T09:42:57.921752'

#### NINTH scenario: DCT of EVEN-symmetric REAL signal, using only sites $S'_1$ in real space and eigenmodes $K'_1$ in wavevector space

In [10]:
#Real space lattice sites
S_1_prime = [0, 1, 2, 3, 49, 50]

# Wavevector eigenmodes
K_1_prime = [0, 2, 4, 6, 8, 50]

parameters["csv_path"] = SSH1_PERIODIC_100_6561_CSV 
parameters["model_name"] = "DecisionTreeClassifier"
parameters["simulation_dir"] = SSH1_PERIODIC_9TH_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = S_1_prime#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 34896

### Fourier features
parameters["fourier_features_to_use"] = K_1_prime
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH1_9TH_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=54.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T09:47:40.787055',
     'end_time': '2020-07-09T09:47:40.806209',
     'duration': 0.019154,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T09:47:40.822164',
     'end_time': '2020-07-09T09:47:40.974462'

## SSH2

In [11]:
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

template = TEMPLATE_NOTEBOOK
parameters = {"model_kw":{"criterion":"entropy", "n_estimators":25, "n_jobs":-1}, \
             "allowed_windings":[-1,0,1,2], "val_split":0.5, "features_to_use":None, "shuffle_features":False,\
             "n_experiments":100, "start_n":0, "fit_params":None, "shuffle_rows": True,"pred_params":None,\
             "random_features":False, "store_in_lists":False, "save_eigenvector":True,\
             "save_hamiltonian":True, "save_accuracy":True, "save_models":True,\
             }
kernel_name = KERNEL_NAME

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
CPU times: user 330 µs, sys: 8 µs, total: 338 µs
Wall time: 254 µs


### periodic_100_6561

#### FIRST scenario: Best real space lattice sites in $S_2$

In the first scenario, we run simulations using the $S_2 = [0, 1, 2, 3, 4, 5, 6, 7, 46, 48, 49, 50]$ best lattices in real space, as determined by the information entropy signatures.


In [12]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

# feature importances
#[(98, 0.030775680224168177), (1, 0.03069088538607333), (0, 0.026794700674389625), (99, 0.026249868968330863),
# (96, 0.020897799386158718), (3, 0.020489140983260747), (2, 0.01992793686700233), (97, 0.019545673056848806),
# (94, 0.01923075195671657), (5, 0.019045271320840478), (4, 0.017291096293802714), (95, 0.017048847481531043),
# (48, 0.015353109828878693), (51, 0.014934669606650726), (50, 0.013899164937009753), (92, 0.01371775078111256),
# (7, 0.013462982352684496), (49, 0.013421274946371955), (6, 0.012883612083351053), (93, 0.012769882886851382),
# (46, 0.012337897368774504), (53, 0.01212310608565037), (90, 0.010997653223096683), (9, 0.010834481547638681),
# (52, 0.010812653542591071), (47, 0.010655015434978536), (44, 0.010465047210191402), (55, 0.010281045213583478),
# (8, 0.009828027039565111), (91, 0.00959694782849967), (10, 0.009302976753229216), (88, 0.009278280526791806),
# (89, 0.00919392033505742), (11, 0.009179509784012994), (54, 0.009047708913725763), (45, 0.008918866304158874),
# (42, 0.008529473018171781), (57, 0.008364823778403564), (86, 0.008246572727366506), (56, 0.00821180818167621),
# (13, 0.00816970905230619), (12, 0.008114276523127217), (43, 0.008081824889517098), (87, 0.00794484045520985),
# (40, 0.007701200518589432), (59, 0.007612698497346249), (58, 0.0075668053024482256), (84, 0.007536839705708784),
# (14, 0.00747972131229634), (85, 0.007477795352594296), (41, 0.007466758344364884), (15, 0.007434672006973498),
# (16, 0.007302080712545534), (83, 0.007225210080693078), (38, 0.007221329558484434), (25, 0.007184232143171722),
# (61, 0.007168350645728535), (67, 0.007152534098465049), (34, 0.007124371893597504), (65, 0.007121552553277619),
# (32, 0.0071142212435522665), (60, 0.007104063693545338), (75, 0.007080742078305485), (82, 0.007060642189080975),
# (27, 0.007037596851734687), (73, 0.007036728229713947), (66, 0.007012846480636373), (39, 0.0070128347301864555),
# (62, 0.007012131745055074), (74, 0.007009007463465764), (72, 0.006999191033856658), (33, 0.00699226356888025),
# (37, 0.006972519992091061), (18, 0.006956490676749397), (17, 0.006945800270561406), (24, 0.0069234690872524915),
# (26, 0.006922955676325168), (22, 0.006918202675106495), (20, 0.006916923010632973), (64, 0.006914878448676203),
# (81, 0.00691250007864052), (77, 0.006906017355299236), (36, 0.006904235174864243), (79, 0.006892372418539275),
# (35, 0.006880046330414811), (63, 0.006861090174322749), (29, 0.006847900311648197), (68, 0.006802621205071987), 
# (31, 0.006783038952794761), (70, 0.006769967143038218), (78, 0.006723531131868591), (30, 0.006719894872152803),
# (69, 0.006715314101826717), (21, 0.0067062765156384666), (23, 0.006688320823666467), (28, 0.006683088644608946),
# (80, 0.006666799865989856), (71, 0.006626751430553458), (19, 0.006624407453970405), (76, 0.006619598384038603)]

S_2 = [0, 1, 2, 3, 4, 5, 6, 7, 46, 48, 49, 50]

### parameters
parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_PERIODIC_1ST_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = S_2 #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 244854
### Fourier features
parameters["fourier_features_to_use"] = None
parameters["fourier_mode"] = None
parameters["fourier_real"] = None
parameters["fourier_normalize"]= None
parameters["fourier_fillna"] = None

output_file = SSH2_1ST_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)





The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=54.0, style=ProgressStyle(description_wid…


CPU times: user 4.78 s, sys: 153 ms, total: 4.93 s
Wall time: 24min 44s
CPU times: user 4.78 s, sys: 153 ms, total: 4.94 s
Wall time: 24min 44s


{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T09:51:14.215387',
     'end_time': '2020-07-09T09:51:14.234465',
     'duration': 0.019078,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T09:51:14.250100',
     'end_time': '2020-07-09T09:51:14.265045'

#### SECOND scenario: Best real space lattice sites in $S'_2$ 

In the second scenario we run simulations using the best $S'_2 = [0, 1, 2, 3, 4, 5]$ real space lattice sites, as determined by the entropy signatures

In [13]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

# feature importances
#[(98, 0.030775680224168177), (1, 0.03069088538607333), (0, 0.026794700674389625), (99, 0.026249868968330863),
# (96, 0.020897799386158718), (3, 0.020489140983260747), (2, 0.01992793686700233), (97, 0.019545673056848806),
# (94, 0.01923075195671657), (5, 0.019045271320840478), (4, 0.017291096293802714), (95, 0.017048847481531043),
# (48, 0.015353109828878693), (51, 0.014934669606650726), (50, 0.013899164937009753), (92, 0.01371775078111256),
# (7, 0.013462982352684496), (49, 0.013421274946371955), (6, 0.012883612083351053), (93, 0.012769882886851382),
# (46, 0.012337897368774504), (53, 0.01212310608565037), (90, 0.010997653223096683), (9, 0.010834481547638681),
# (52, 0.010812653542591071), (47, 0.010655015434978536), (44, 0.010465047210191402), (55, 0.010281045213583478),
# (8, 0.009828027039565111), (91, 0.00959694782849967), (10, 0.009302976753229216), (88, 0.009278280526791806),
# (89, 0.00919392033505742), (11, 0.009179509784012994), (54, 0.009047708913725763), (45, 0.008918866304158874),
# (42, 0.008529473018171781), (57, 0.008364823778403564), (86, 0.008246572727366506), (56, 0.00821180818167621),
# (13, 0.00816970905230619), (12, 0.008114276523127217), (43, 0.008081824889517098), (87, 0.00794484045520985),
# (40, 0.007701200518589432), (59, 0.007612698497346249), (58, 0.0075668053024482256), (84, 0.007536839705708784),
# (14, 0.00747972131229634), (85, 0.007477795352594296), (41, 0.007466758344364884), (15, 0.007434672006973498),
# (16, 0.007302080712545534), (83, 0.007225210080693078), (38, 0.007221329558484434), (25, 0.007184232143171722),
# (61, 0.007168350645728535), (67, 0.007152534098465049), (34, 0.007124371893597504), (65, 0.007121552553277619),
# (32, 0.0071142212435522665), (60, 0.007104063693545338), (75, 0.007080742078305485), (82, 0.007060642189080975),
# (27, 0.007037596851734687), (73, 0.007036728229713947), (66, 0.007012846480636373), (39, 0.0070128347301864555),
# (62, 0.007012131745055074), (74, 0.007009007463465764), (72, 0.006999191033856658), (33, 0.00699226356888025),
# (37, 0.006972519992091061), (18, 0.006956490676749397), (17, 0.006945800270561406), (24, 0.0069234690872524915),
# (26, 0.006922955676325168), (22, 0.006918202675106495), (20, 0.006916923010632973), (64, 0.006914878448676203),
# (81, 0.00691250007864052), (77, 0.006906017355299236), (36, 0.006904235174864243), (79, 0.006892372418539275),
# (35, 0.006880046330414811), (63, 0.006861090174322749), (29, 0.006847900311648197), (68, 0.006802621205071987), 
# (31, 0.006783038952794761), (70, 0.006769967143038218), (78, 0.006723531131868591), (30, 0.006719894872152803),
# (69, 0.006715314101826717), (21, 0.0067062765156384666), (23, 0.006688320823666467), (28, 0.006683088644608946),
# (80, 0.006666799865989856), (71, 0.006626751430553458), (19, 0.006624407453970405), (76, 0.006619598384038603)]


S_2_prime = [0, 1, 2, 3, 4, 5]
### parameters
parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_PERIODIC_2ND_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = S_2_prime #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 93474
### Fourier features
parameters["fourier_features_to_use"] = None
parameters["fourier_mode"] = None
parameters["fourier_real"] = None
parameters["fourier_normalize"]= None
parameters["fourier_fillna"] = None

output_file = SSH2_2ND_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)





The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=54.0, style=ProgressStyle(description_wid…


CPU times: user 4.74 s, sys: 113 ms, total: 4.86 s
Wall time: 18min
CPU times: user 4.75 s, sys: 113 ms, total: 4.86 s
Wall time: 18min


{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T10:15:58.365279',
     'end_time': '2020-07-09T10:15:58.401552',
     'duration': 0.036273,
     'status': 'completed'}},
   'source': '# Simulation template \n\nIn this notebook we run the machine learning analysis of topological phase transitions occurring  in both nearest-neighbours SSH models (ssh1) and second neighbours models (ssh2) as decribed in the paper [Machine learning topological phases in real space](https://arxiv.org/abs/1901.01963). Here the simulation is run with features generated from fourier components in the first scenario. This scenario is characterized by using only the EVEN wavevector space eigenmodes, computed from ALL real space components.'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-07-09T10:15:58.423585',
     'end_time': '2020-07-09T10:15:58.439864'

#### THIRD scenario:DFT of REAL signal, computed from ALL lattice sites (51 features)

The third scenario uses as features the REAL part of the the first HALF of components of the DFT of real space eigenvectors, computed from ALL real space lattice sites. This leads to $N=51$ engineered features. Note that in this case we use ALL $N=100$ features in real space to compute the DFT.

In [14]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_PERIODIC_3RD_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = None#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 467

### Fourier features
parameters["fourier_features_to_use"] = list(range(0,51,1))
parameters["fourier_mode"] = "dft"
parameters["fourier_real"] = "real"
parameters["normalize"]= False
parameters["fillna"] = False

output_file = SSH2_3RD_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=54.0, style=ProgressStyle(description_wid…




OSError: [Errno 28] No space left on device

CPU times: user 1.99 s, sys: 61.3 ms, total: 2.05 s
Wall time: 11min 4s


#### FOURTH scenario: DCT of EVEN-symmetric REAL signal (51 features) 

The fourth scenario uses as features the DCT computed assuming that the real space vectors are REAL and EVEN-symmetric around 0 and $\frac{N}{2}$. The wavevector space components are computed using all EVEN and ODD real lattice sites from the first half of the real space lattice. The number of resulting features is $\frac{N}{2}+1 = 51$

In [15]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_PERIODIC_4TH_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = None#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 548936312

### Fourier features
parameters["fourier_features_to_use"] = list(range(0,51,1))
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH2_4TH_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The history saving thread hit an unexpected error (OperationalError('database or disk is full',)).History will not be written to the database.


HBox(children=(FloatProgress(value=0.0, description='Executing', max=54.0, style=ProgressStyle(description_wid…




OSError: [Errno 28] No space left on device

CPU times: user 1.39 s, sys: 27.6 ms, total: 1.42 s
Wall time: 49.1 s


#### FIFTH scenario: DCT of EVEN-symmetric REAL signal, using only EVEN components of DCF (2 5features) 

The fifth scenario uses as features only the EVEN components of the DCT computed assuming that the real space vectors are REAL and EVEN-symmetric around 0 and $\frac{N}{2}$. The wavevector space components are computed using all EVEN and ODD real lattice sites from the first half of the real space lattice. The resulting number of features is $\frac{N}{4}+1=26$

In [16]:
%%time
%%time
%load_ext autoreload
%autoreload 2
from simulation import *

parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_PERIODIC_5TH_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = None#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 34896

### Fourier features
parameters["fourier_features_to_use"] = list(range(0,51,2))
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH2_5TH_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


HBox(children=(FloatProgress(value=0.0, description='Executing', max=54.0, style=ProgressStyle(description_wid…




OSError: [Errno 28] No space left on device

CPU times: user 1.39 s, sys: 32.2 ms, total: 1.42 s
Wall time: 36.4 s


#### SIXTH scenario: DCT of EVEN-symmetric REAL signal, using only eigenmodes $K_2$ in wavevector space

The sixth scenario uses as features only the $K_2 = (0, 1, 2, 3, 4, 5, 6, 8, 42, 44, 46, 48)$ components of the DCT computed assuming that the real space vectors are REAL and EVEN-symmetric around 0 and $\frac{N}{2}$. The wavevector space components are computed using all EVEN and ODD real lattice sites from the first half of the real space lattice. The resulting number of features is $~\frac{N}{8}~12$

In [17]:
#wave_vector_feat_importances = 
#[(0, 0.950806830424763), (2, 0.01331131304222723), (98, 0.01331131304222723), (1, 0.004476201323265753),
# (99, 0.004476201323265753), (4, 0.0035857537589848837), (96, 0.0035857537589848837), (3, 0.001257935893002076),
# (97, 0.001257935893002076), (6, 0.0007069831412851035), (94, 0.0007069831412851035), (46, 0.0002467272079133921),
# (54, 0.0002467272079133921), (8, 0.00018241203787459185), (92, 0.00018241203787459185), (48, 0.0001451951516648948),
# (52, 0.0001451951516648948), (44, 9.843040009282692e-05), (56, 9.843040009282692e-05), (5, 9.19011466551899e-05),
# (95, 9.19011466551899e-05), (42, 6.866192435836294e-05), (58, 6.866192435836294e-05), (18, 3.3323093081472305e-05),
# (82, 3.3323093081472305e-05), (12, 3.106487312033803e-05), (88, 3.106487312033803e-05), (47, 2.3816419839103435e-05),
# (53, 2.3816419839103435e-05), (34, 2.3241123361927566e-05), (66, 2.3241123361927566e-05), (16, 2.2323627288064983e-05),
# (84, 2.2323627288064983e-05), (32, 2.2124235763368948e-05), (68, 2.2124235763368948e-05), (30, 2.10288772451345e-05),
# (70, 2.10288772451345e-05), (10, 2.090529400303802e-05), (90, 2.090529400303802e-05), (38, 1.9410706597490878e-05), 
# (62, 1.9410706597490878e-05), (36, 1.875724022079967e-05), (64, 1.875724022079967e-05), (26, 1.825645649535029e-05),
# (74, 1.825645649535029e-05), (28, 1.7862830398443465e-05), (72, 1.7862830398443465e-05), (40, 1.6194565946892457e-05),
# (60, 1.6194565946892457e-05), (33, 1.5392780128594145e-05), (67, 1.5392780128594145e-05), (19, 1.4185213987116413e-05),
# (81, 1.4185213987116413e-05), (7, 1.356204999924156e-05), (93, 1.356204999924156e-05), (35, 1.1170423908057128e-05),
# (65, 1.1170423908057128e-05), (14, 1.112046577249778e-05), (86, 1.112046577249778e-05), (20, 9.752018202687417e-06),
# (80, 9.752018202687417e-06), (15, 9.367245953490434e-06), (85, 9.367245953490434e-06), (31, 8.822300302292681e-06),
# (69, 8.822300302292681e-06), (25, 6.643388933308398e-06), (75, 6.643388933308398e-06), (17, 6.609081046562858e-06),
# (83, 6.609081046562858e-06), (45, 5.544693390346116e-06), (55, 5.544693390346116e-06), (50, 5.207356471316938e-06),
# (29, 4.884862846567487e-06), (71, 4.884862846567487e-06), (27, 4.410013160570784e-06), (73, 4.410013160570784e-06),
# (24, 3.959576099120265e-06), (76, 3.959576099120265e-06), (49, 3.3599159617141762e-06), (51, 3.3599159617141762e-06),
# (9, 2.5089373221024502e-06), (91, 2.5089373221024502e-06), (21, 1.5004285162157062e-06), (79, 1.5004285162157062e-06),
# (43, 5.907772852569324e-07), (57, 5.907772852569324e-07), (37, 2.924375698681946e-07), (63, 2.924375698681946e-07),
# (23, 1.629456807410269e-07), (77, 1.629456807410269e-07), (41, 1.37098461515355e-07), (59, 1.37098461515355e-07),
# (11, 1.1995854099374903e-07), (89, 1.1995854099374903e-07), (22, 4.723512969971105e-08), (78, 4.723512969971105e-08),
# (39, 8.485226090679877e-09), (61, 8.485226090679877e-09), (13, 4.405272447523249e-09), (87, 4.405272447523249e-09)]

# Wavevector eigenmodes
K_2 = [0, 1, 2, 3, 4, 5, 6, 8, 42, 44, 46, 48]

parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_PERIODIC_6TH_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = None#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 34896

### Fourier features
parameters["fourier_features_to_use"] = K_2
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH2_6TH_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=54.0, style=ProgressStyle(description_wid…




OSError: [Errno 28] No space left on device

#### SEVENTH scenario: DCT of EVEN-symmetric REAL signal, using only eigenmodes $K'_2$ in wavevector space

The seventh scenario uses as features only the $K'_2 = (0, 1, 2, 3, 4, 5)$ components of the DCT computed assuming that the real space vectors are REAL and EVEN-symmetric around 0 and $\frac{N}{2}$. The wavevector space components are computed using all EVEN and ODD real lattice sites from the first half of the real space lattice. The resulting number of features is $~\frac{N}{16} ~ 6$

In [None]:
#wave_vector_feat_importances = 
#[(0, 0.950806830424763), (2, 0.01331131304222723), (98, 0.01331131304222723), (1, 0.004476201323265753),
# (99, 0.004476201323265753), (4, 0.0035857537589848837), (96, 0.0035857537589848837), (3, 0.001257935893002076),
# (97, 0.001257935893002076), (6, 0.0007069831412851035), (94, 0.0007069831412851035), (46, 0.0002467272079133921),
# (54, 0.0002467272079133921), (8, 0.00018241203787459185), (92, 0.00018241203787459185), (48, 0.0001451951516648948),
# (52, 0.0001451951516648948), (44, 9.843040009282692e-05), (56, 9.843040009282692e-05), (5, 9.19011466551899e-05),
# (95, 9.19011466551899e-05), (42, 6.866192435836294e-05), (58, 6.866192435836294e-05), (18, 3.3323093081472305e-05),
# (82, 3.3323093081472305e-05), (12, 3.106487312033803e-05), (88, 3.106487312033803e-05), (47, 2.3816419839103435e-05),
# (53, 2.3816419839103435e-05), (34, 2.3241123361927566e-05), (66, 2.3241123361927566e-05), (16, 2.2323627288064983e-05),
# (84, 2.2323627288064983e-05), (32, 2.2124235763368948e-05), (68, 2.2124235763368948e-05), (30, 2.10288772451345e-05),
# (70, 2.10288772451345e-05), (10, 2.090529400303802e-05), (90, 2.090529400303802e-05), (38, 1.9410706597490878e-05), 
# (62, 1.9410706597490878e-05), (36, 1.875724022079967e-05), (64, 1.875724022079967e-05), (26, 1.825645649535029e-05),
# (74, 1.825645649535029e-05), (28, 1.7862830398443465e-05), (72, 1.7862830398443465e-05), (40, 1.6194565946892457e-05),
# (60, 1.6194565946892457e-05), (33, 1.5392780128594145e-05), (67, 1.5392780128594145e-05), (19, 1.4185213987116413e-05),
# (81, 1.4185213987116413e-05), (7, 1.356204999924156e-05), (93, 1.356204999924156e-05), (35, 1.1170423908057128e-05),
# (65, 1.1170423908057128e-05), (14, 1.112046577249778e-05), (86, 1.112046577249778e-05), (20, 9.752018202687417e-06),
# (80, 9.752018202687417e-06), (15, 9.367245953490434e-06), (85, 9.367245953490434e-06), (31, 8.822300302292681e-06),
# (69, 8.822300302292681e-06), (25, 6.643388933308398e-06), (75, 6.643388933308398e-06), (17, 6.609081046562858e-06),
# (83, 6.609081046562858e-06), (45, 5.544693390346116e-06), (55, 5.544693390346116e-06), (50, 5.207356471316938e-06),
# (29, 4.884862846567487e-06), (71, 4.884862846567487e-06), (27, 4.410013160570784e-06), (73, 4.410013160570784e-06),
# (24, 3.959576099120265e-06), (76, 3.959576099120265e-06), (49, 3.3599159617141762e-06), (51, 3.3599159617141762e-06),
# (9, 2.5089373221024502e-06), (91, 2.5089373221024502e-06), (21, 1.5004285162157062e-06), (79, 1.5004285162157062e-06),
# (43, 5.907772852569324e-07), (57, 5.907772852569324e-07), (37, 2.924375698681946e-07), (63, 2.924375698681946e-07),
# (23, 1.629456807410269e-07), (77, 1.629456807410269e-07), (41, 1.37098461515355e-07), (59, 1.37098461515355e-07),
# (11, 1.1995854099374903e-07), (89, 1.1995854099374903e-07), (22, 4.723512969971105e-08), (78, 4.723512969971105e-08),
# (39, 8.485226090679877e-09), (61, 8.485226090679877e-09), (13, 4.405272447523249e-09), (87, 4.405272447523249e-09)]

# Wavevector eigenmodes
K_2_prime = [0, 1, 2, 3, 4, 5]

parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_PERIODIC_7TH_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = None#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 34896

### Fourier features
parameters["fourier_features_to_use"] = K_2_prime
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH2_7TH_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

#### EIGHTH scenario: DCT of EVEN-symmetric REAL signal, using only sites $S_2$ in real space and eigenmodes $K_2$ in wavevector space

In [None]:
#Real space lattice sites
S_2 = [0, 1, 2, 3, 4, 5, 6, 7, 46, 48, 49, 50]

# Wavevector eigenmodes
K_2 = [0, 1, 2, 3, 4, 5, 6, 8, 42, 44, 46, 48]

parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_PERIODIC_8TH_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = S_2#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 34896

### Fourier features
parameters["fourier_features_to_use"] = K_2
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH2_8TH_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)

#### NINTH scenario: DCT of EVEN-symmetric REAL signal, using only sites $S'_2$ in real space and eigenmodes $K'_2$ in wavevector space

In [None]:
#Real space lattice sites
S_2_prime = [0, 1, 2, 3, 4, 5]

# Wavevector eigenmodes
K_2_prime = [0, 1, 2, 3, 4, 5]

parameters["csv_path"] = SSH2_PERIODIC_100_6561_CSV 
parameters["model_name"] = "RandomForestClassifier"
parameters["simulation_dir"] = SSH2_PERIODIC_9TH_SCENARIO_100_6561_SIMULATION_DIR
parameters["features_to_use"] = S_2_prime#[0, 3, 50, 51] #[0, 1, 3, 50, 51, 53] 
parameters["random_state"] = 34896

### Fourier features
parameters["fourier_features_to_use"] = K_2_prime
parameters["fourier_mode"] = "dct"
parameters["fourier_real"] = None
parameters["fourier_normalize"]= False
parameters["fourier_fillna"] = False

output_file = SSH2_9TH_SCENARIO_100_6561_OUTPUT_FILE
pm.execute_notebook(template,
                    output_file,
                    parameters=parameters,
                    kernel_name=kernel_name,
                    nest_asyncio=True)