In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np

In [None]:
from visualizer import Visualizer

In [None]:
from sissopp import Inputs, FeatureSpace, SISSOClassifier, FeatureNode, Unit
from sissopp.py_interface import read_csv
from sissopp.py_interface.import_dataframe import get_unit

In [None]:
df_train = pd.read_pickle('./data/training_set')

In [None]:
zeta = {'S':16, 'As':33, 'Se':34, 'Sb':51, 'Te':52, 'Bi':83}
chi = {'S':2.58, 'As':2.18, 'Se':2.55, 'Sb':2.05, 'Te':2.12, 'Bi':2.02}
lambd = {'S':0.05, 'As':0.19, 'Se':0.22, 'Sb':0.4, 'Te':0.49, 'Bi':1.25}

df_feat = pd.DataFrame(index=df_train.index, columns=[
                                                     'z_A','z_B','z_L','z_M','z_N',
                                                     'x_A','x_B','x_L','x_M','x_N',
                                                     'l_A','l_B','l_L','l_M','l_N',
                                                     ], dtype=object)
for comp in df_train.index:
    ablmn = comp.split('_')
    df_feat.loc[comp] = pd.Series({
                                   'z_A':zeta[ablmn[0]],
                                   'z_B':zeta[ablmn[1]],
                                   'z_L':zeta[ablmn[2]],
                                   'z_M':zeta[ablmn[3]],
                                   'z_N':zeta[ablmn[4]],
                                   'x_A':chi[ablmn[0]],
                                   'x_B':chi[ablmn[1]],
                                   'x_L':chi[ablmn[2]],
                                   'x_M':chi[ablmn[3]],
                                   'x_N':chi[ablmn[4]],
                                   'l_A':lambd[ablmn[0]],
                                   'l_B':lambd[ablmn[1]],
                                   'l_L':lambd[ablmn[2]],
                                   'l_M':lambd[ablmn[3]],
                                   'l_N':lambd[ablmn[4]],
                                  }) 

df_feat['Class'] = df_train['Class']

In [None]:
max_rung=0
n_sis_select=50
n_dim=2
n_residual=10

In [None]:
selected_ops = ["add", "sub", "mult", "div", "abs_diff", "sq", "cb", "sqrt", "cbrt", "inv", "abs"] 
selected_features = 'all'
inputs = read_csv(
    df_train, 
    prop_key="Class",
    cols='all',
    max_rung=max_rung,
    leave_out_frac=0.0,
    )

In [None]:
inputs.max_rung = max_rung
inputs.allowed_ops = selected_ops
inputs.n_sis_select = n_sis_select
inputs.n_dim = n_dim
inputs.n_residual = n_residual
inputs.n_model_store = 1
inputs.calc_type = "classification"
inputs.sample_ids_train = df_feat.index.tolist()
inputs.prop_train = df_feat["Class"].to_numpy()
inputs.prop_test = np.array([])
inputs.prop_label = "Class"
inputs.task_names = ["all_mats"]


feat_space = FeatureSpace(inputs)

sisso = SISSOClassifier(inputs, feat_space)

In [None]:
feat_space = FeatureSpace(inputs)
    
sisso = SISSOClassifier(inputs, feat_space)

In [None]:
sisso.fit()

In [None]:
model = sisso.models[1][0]
classified=model.prop_train
compounds = df_train.index.to_list()
df=pd.DataFrame(data={
    "Compound":compounds,
    "Classification":classified})
for feat in sisso.models[sisso.n_dim-1][0].feats:
    df[str(feat.expr)]=feat.value
classes = ['Topological insulators', 'Trivial insulators']


In [None]:
df['Classes']=np.where(df['Classification']==0,classes[0],classes[1])

In [None]:
df=df.set_index('Compound')

In [None]:
features = list(reversed([str(str(feat.expr)) for feat in sisso.models[sisso.n_dim - 1][0].feats]))

In [None]:
visualizer=Visualizer(df, features, features, target='Classification')

In [None]:
visualizer.show()

In [None]:
df['abs[[[X16C+X16D]*[Z11E/X16B]]-[[Z11C+Z11D]+abs[Z11C-Z11D]]]'].values