In [1]:
import os
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
model_folder = 'C:/Users\lukec\PycharmProjects\emissions-tracking-conda\emissions-tracking\models/'
datasets = ['petrochemicals/', 'CT_manufacturing/', 'unfccc/']

In [94]:
clf_files, dataset_type = [], []
for dataset in datasets:
    d_files = [file for file in os.listdir(model_folder+dataset) if 'accuracies' in file]
    clf_files = clf_files+d_files
    dataset_type = dataset_type+[dataset[:-1]]*len(d_files)

clf_file_df = pd.DataFrame([clf_files, dataset_type, [i+'/'+j for (i,j) in zip(dataset_type,clf_files)]]).transpose()
clf_file_df.columns = ['Classifier', 'Dataset', 'Filepath']
clf_file_df['Class'] = [i.split('_')[1] for i in clf_file_df['Classifier']]
clf_file_df['Level'] = [i.split('_')[2] for i in clf_file_df['Classifier']]
clf_file_df = clf_file_df.drop_duplicates(subset=['Dataset','Class', 'Level'], keep='last')

In [95]:
#optimals = pd.DataFrame([], columns=['Dataset', 'Class', 'Level', 'Hyperparameters'])
hyperparameters = []
for row in clf_file_df.iloc:
    df = pd.read_table(model_folder+row['Filepath'])
    df[['Hyperparameters', 'Accuracy']] = df['Hyperparameters,Accuracy'].str.rsplit(',', 1, expand=True)
    df.drop(columns=['Hyperparameters,Accuracy'], inplace=True)
    df['Accuracy'] = df['Accuracy'].astype(float)
    max_index = df['Accuracy'].idxmax()
    hyperparameters = hyperparameters + [df.loc[max_index]['Hyperparameters']]
clf_file_df['Hyperparameters'] = hyperparameters
sklearn = clf_file_df.copy()

In [96]:
clf_files, dataset_type = [], []
for dataset in datasets:
    d_files = [file for file in os.listdir(model_folder+dataset) if 'iter' in file and '.npy' in file]
    clf_files = clf_files+d_files
    dataset_type = dataset_type+[dataset[:-1]]*len(d_files)

clf_file_df = pd.DataFrame([clf_files, dataset_type, [i+'/'+j for (i,j) in zip(dataset_type,clf_files)]]).transpose()
clf_file_df.columns = ['Classifier', 'Dataset', 'Filepath']
clf_file_df['Class'] = [i.split('_')[1] for i in clf_file_df['Classifier']]
clf_file_df['Level'] = [i.split('_')[2] for i in clf_file_df['Classifier']]

In [97]:
hyperparameters = []
accuracy = []
for row in clf_file_df.iloc:
    accuracy = accuracy + [np.load(model_folder+row['Filepath'], allow_pickle=True).item()['average_acc']]
    hyperparameters = hyperparameters+["{'hidden_dim': "+row['Classifier'].split('_')[-2]+", 'learning_rate': "+row['Classifier'].split('_')[-3]+'}']

clf_file_df['Accuracy'] = accuracy
clf_file_df['Hyperparameters'] = hyperparameters
neurals = clf_file_df.sort_values(['Dataset', 'Class', 'Level', 'Accuracy'], ascending=False).drop_duplicates(['Dataset', 'Class', 'Level'], keep='first')
neurals = neurals.drop(columns='Accuracy')
neurals = neurals[[i in ['DeepNet', 'ResNet', 'LSTM'] for i in neurals['Class']]]

In [120]:
graph_copy = neurals[neurals['Class']=='DeepNet']
gcn = graph_copy.copy()
gcn['Class'], gcn['Hyperparameters'] = 'GCN', "{'learning_rate': 0.01, 'hidden_dim': 64}"
sage = graph_copy.copy()
sage['Class'], sage['Hyperparameters'] = 'GraphSAGE', "{'learning_rate': 0.01, 'hidden_dim': 64}"
all_hps = pd.concat((sklearn, neurals, gcn, sage))[['Dataset', 'Class', 'Level', 'Hyperparameters']]

In [122]:
all_hps['Dataset'] = all_hps['Dataset'].replace({'petrochemicals':'Petrochemicals', 'unfccc':'UNFCCC', 'CT_manufacturing':'ClimateTRACE'})
all_hps['Class'] = all_hps['Class'].replace({'adaBoostClassifier':'Adaboost', 'decisionTreeClassifier':'Decision tree', 'KNeighboursClassifier':'K-nearest neighbours', 'linearLogisticRegression':'Logistic regression', 'linearPassiveAggressiveClassifier':'Passive aggressive', 'linearPerceptron':'Perceptron', 'linearSGDClassifier':'SGD', 'linearSVC':'SVC', 'mlpClassifier':'Multilayer perceptron', 'naiveBayesClassifier':'Naive Bayes', 'randomForestClassifier-n50':'Random forest'})
all_hps['Level'] = all_hps['Level'].replace({'l1':'1', 'l2':'2', 'l3':'3'})
all_hps.rename(columns={'Class':'Model'},inplace=True)

In [125]:
all_hps.sort_values(['Dataset', 'Model', 'Level']).to_csv('C:/Users\lukec\PycharmProjects\emissions-tracking-conda\emissions-tracking\outputs\GRL_for_IE/hyperparameter_values.csv')