In [1]:
import pandas as pd
# ignore future warnings, some code needs to be updated when packages are updated!
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import os
import csv
import h5py
import numpy as np
from joblib import load

from sklearn.metrics import r2_score
from tools import add_interactions

In [2]:
work_dir = "/home/mlindner/coen/micha/netsci_vs_gnn/"

model_infos = []

for root, dirs, files in os.walk(work_dir + "models_paper"):

    for file in files:
        if file.endswith(".csv") and not file.startswith("gnn") and not "extended" in file:
            with open(os.path.join(root, file), 'r') as data:
                model_infos.append(list(csv.DictReader(data))[0])
_mi = pd.DataFrame(model_infos)


In [3]:
mi = _mi[(_mi["task"] == "SNBS")]

In [4]:
datapath = "/p/projects/coen/christian/datasets/snbs_homogeneous_dataset/single_files_netsci/"

for country in ["elmod", "osf_france",  "osf_gb",  "osf_spain",  "texas"]:
    
    print(country)
        
    hf = h5py.File(datapath + country + "/snbs.h5", 'r')
    y = np.array(hf.get(list(hf.keys())[0])).flatten()
    hf.close()
            

    X = pd.read_csv(datapath + country + "/input_features.csv").drop(columns=["node_cat", "proper leaf"])

    
    if country not in ["texas"]:
        X = X.drop(columns=["neighbor_degree_mean"])


    Xinter = add_interactions(X.drop(columns=["backup_capacity", "maximal_line_load_post_dc"]))
    
    for i in mi.index:
        
        model = load(mi["model_path"][i] + "model.joblib")
        
        # It's important to scale the model data with the StandardScaler (model[0])
        if mi["with_interactions"][i]=="True":
            model[0].fit(Xinter)
        else:
            model[0].fit(X)
        
        if mi["model_name"][i] in ["GradientBoostingClassifierSNBS", "LogisticRegressionSNBS"]:
            
            print("Classifier")
            
            if mi["with_interactions"][i]=="True":
                ev_r2 = r2_score(y, model.predict_proba(Xinter)[:, 1])
                
            else:
                ev_r2 = r2_score(y, model.predict_proba(X)[:, 1])
                    
        else:    
            
            print("Regressor")

            if mi["with_interactions"][i]=="True":         
                ev_r2 = r2_score(y, model.predict(Xinter))
            else:
                ev_r2 = r2_score(y, model.predict(X))
                
        print(mi["model_name"][i], " tr", mi["nodes_per_grid"][i], ": ", 
              ev_r2)

        _mi.at[i,"ev_" + country + "_r2"] = ev_r2



elmod
Regressor
omp1LinearSNBS  tr 100 :  0.24837598511877002
Regressor
omp1LinearSNBS  tr 20 :  -0.386731356194576
Regressor
omp1LinearSNBS  tr 100 :  0.25540750039775617
Regressor
omp1LinearSNBS  tr 20 :  -0.026926060722210376
Regressor
omp1LinearSNBS  tr 100 :  0.25887503067449635
Regressor
omp1LinearSNBS  tr 20 :  -0.3473317192304677
Regressor
omp1LinearSNBS  tr 100 :  0.14557955814793222
Regressor
omp1LinearSNBS  tr 20 :  -0.1533072375481146
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.051291036023501824
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.5325369705728416
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.5325369705728416
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.051291036023501824
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.6030478219891748
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.06018632882505315
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.15506306912271361
Regressor
GradientBoostingRegressionSNBS  tr 100 :

Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.34914494061800017
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.611216175759272
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.611216175759272
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.34914494061800017
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.6882201391110645
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.46195850988137044
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.4026795633730176
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.663994023960168
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.4026795633730176
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.4636405817154138
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.1471597135751488
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.4636405817154138
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.6930817405284155
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.611216175759272
Regressor
Gra

Regressor
GradientBoostingRegressionSNBS  tr 100 :  -0.740607529185906
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.14404094780721644
Regressor
GradientBoostingRegressionSNBS  tr 100 :  -0.7705335246354268
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.17501883925972883
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.17003335110998408
Regressor
GradientBoostingRegressionSNBS  tr 100 :  -0.7667290447279385
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.17003335110998408
Regressor
GradientBoostingRegressionSNBS  tr 100 :  -0.913690143397154
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.25206456785750153
Regressor
GradientBoostingRegressionSNBS  tr 100 :  -0.913690143397154
Regressor
GradientBoostingRegressionSNBS  tr 100 :  -0.8073243764475702
Regressor
GradientBoostingRegressionSNBS  tr 100 :  -0.740607529185906
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.14404094780721644
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.14404094780721644
Reg

Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.2917436500191152
Regressor
GradientBoostingRegressionSNBS  tr 20 :  -0.0764745406077465
Regressor
GradientBoostingRegressionSNBS  tr 20 :  -0.0045788281181200485
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.1819043280445144
Regressor
GradientBoostingRegressionSNBS  tr 20 :  -0.0045788281181200485
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.3271255369918252
Regressor
GradientBoostingRegressionSNBS  tr 20 :  -0.27341486756226185
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.3271255369918252
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.2748532047310268
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.4838645001801867
Regressor
GradientBoostingRegressionSNBS  tr 20 :  -0.4303386315346944
Regressor
GradientBoostingRegressionSNBS  tr 20 :  -0.4303386315346944
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.4838645001801867
Regressor
GradientBoostingRegressionSNBS  tr 20 :  -0.4303386315346944

Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.12825937157849143
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.2855199441787588
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.12825937157849143
Regressor
GradientBoostingRegressionSNBS  tr 100 :  -0.03351492489237451
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.03962379011439621
Regressor
GradientBoostingRegressionSNBS  tr 100 :  -0.03351492489237451
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.26456925837965717
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.25352991723839247
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.26676251581452215
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.26676251581452215
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.25352991723839247
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.26676251581452215
Regressor
GradientBoostingRegressionSNBS  tr 100 :  0.24243412164574574
Regressor
GradientBoostingRegressionSNBS  tr 20 :  0.0396237901143962

In [5]:
i=43
_mi.loc[i,:]

model_name                                                   omp1LinearSNBS
model                     Pipeline(steps=[('standardscaler', StandardSca...
scaler                                                     StandardScaler()
task                                                                   SNBS
random_state                                                               
datetime                                         2023-05-04 15:36:57.968421
nodes_per_grid                                                           20
features_path             /home/mlindner/coen/micha/netsci_vs_gnn/grids2...
labels_path               /home/mlindner/coen/micha/netsci_vs_gnn/grids2...
train_r2                                                 0.4233669406768461
ev20_r2                                                  0.4274297177257468
ev100_r2                                                 0.3415478778557044
evTex_r2                                              0.0004000469460393452
train_f2    

In [6]:
_mi.to_csv(work_dir + "models_paper/model_info_extended.csv", index=False)

# OLD

In [7]:
if False:
    datapath = "/p/projects/coen/christian/datasets/snbs_homogeneous_dataset/single_files_netsci/"

    for country in ["elmod", "osf_france",  "osf_gb",  "osf_spain",  "texas"]:

        print(country)

        if country == "ds20/test":
            hf = h5py.File("/home/mlindner/coen/micha/netsci_vs_gnn/grids20/snbs_complete.h5", 'r')
            y = np.array(hf.get(list(hf.keys())[0])).flatten()
            hf.close()
            y = y[int(len(y) * 0.85):]
        else:
            hf = h5py.File(datapath + country + "/snbs.h5", 'r')
            y = np.array(hf.get(list(hf.keys())[0])).flatten()
            hf.close()


        if country == "ds20/test":
            _X = pd.read_csv("/home/mlindner/coen/micha/netsci_vs_gnn/grids20/network_measures_final.csv").drop(
                columns=["node_cat", "proper leaf"])
            Xtrain = _X.iloc[:int(len(_X) * 0.7)]
            X = _X.iloc[int(len(_X) * 0.85):]
        else:
            X = pd.read_csv(datapath + country + "/input_features.csv").drop(columns=["node_cat", "proper leaf"])


        if country not in ["texas", "ds20/test"]:
            X = X.drop(columns=["neighbor_degree_mean"])

        #_Xinter = add_interactions(_X.drop(columns=["backup_capacity", "maximal_line_load_post_dc"]))

        Xinter = add_interactions(X.drop(columns=["backup_capacity", "maximal_line_load_post_dc"]))

        _mi["ev_" + country + "_r2"] = None

        for i in mi.index:
            model = load(mi["model_path"][i] + "model.joblib")
            if mi["with_interactions"][i]=="True":
                model[0].fit(Xinter)
            else:
                model[0].fit(X)

            if mi["model_name"][i] in ["GradientBoostingClassifierSNBS", "LogisticRegressionSNBS"]:
                print("Classifier")
                if mi["with_interactions"][i]=="True":
                    ev_r2 = r2_score(y, model.predict_proba(Xinter)[:, 1])
                    print(mi["model_name"][i], " tr", mi["nodes_per_grid"][i], ": ", 
                          ev_r2)
                    _mi.loc[i,"ev_" + country + "_r2"] = ev_r2
                else:
                    ev_r2 = r2_score(y, model.predict_proba(X)[:, 1])
                    print(mi["model_name"][i], " tr", mi["nodes_per_grid"][i], ": ", 
                          ev_r2)
                    _mi.loc[i,"ev_" + country + "_r2"] = ev_r2

            else:    
                print("Regressor")

                if mi["with_interactions"][i]=="True":
                    ev_r2 = r2_score(y, model.predict(Xinter))
                    print(mi["model_name"][i], " tr", mi["nodes_per_grid"][i], ": ", 
                          ev_r2)
                    _mi.loc[i,"ev_" + country + "_r2"] = ev_r2
                else:
                    ev_r2 = r2_score(y, model.predict(X))
                    print(mi["model_name"][i], " tr", mi["nodes_per_grid"][i], ": ", 
                          ev_r2)
                    _mi.loc[i,"ev_" + country + "_r2"] = ev_r2
