In [50]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys
from sklearn.metrics import auc

In [None]:
['target_qbc', 'masster_cotraining', 'masster_self_learning', 'pct']

In [83]:
class Evaluator:
    def __init__(self, dataset_name, method, metric_name, iterations, folds):
        self.dataset_name = dataset_name
        self.metric_name = metric_name
        self.method = method
        self.iterations = iterations
        self.folds = folds


    def generate_path(self, fold_number):
        if self.method == 'target_qbc':
            type = 'active_learning'
            file_path = Path(f'D:\Pessoal\Research\Active-and-Self-Learning-for-Multi-target-Regression\\reports\{type}\{self.dataset_name}\{self.method}_results_fold_{fold_number}.csv')
        
        if self.method == 'masster_cotraining' or self.method == 'masster_self_learning':
            type = 'proposed_method'
            file_path = Path(f'D:\Pessoal\Research\Active-and-Self-Learning-for-Multi-target-Regression\\reports\{type}\{self.dataset_name}\{self.method}_fold_{fold_number}.csv')
        
        if self.method == 'pct':
            type = 'semi_supervised_learning'
            file_path = Path(f'D:\Pessoal\Research\Active-and-Self-Learning-for-Multi-target-Regression\\reports\{type}\{self.dataset_name}\{self.method}_results_fold_{fold_number}.csv')
        
        return file_path
    
    def read_clean_dataframe(self, file_path):
        if file_path.exists():
            df = pd.read_csv(file_path, index_col=0)
            iteration_list = list(range(self.iterations)) 
            df = df[df['Iterations'].isin(iteration_list)]
        else:
            print(file_path)
            #raise(NameError)
        return df
    
    def concate_dataframes(self):
        self.full_df = pd.DataFrame()
        for fold_number in list(range(self.folds)):
            file_path = self.generate_path(fold_number)
            df = self.read_clean_dataframe(file_path)

            self.full_df = pd.concat([self.full_df, df], axis=0)
        
        self.full_df['Method'] = self.method

        return self.full_df
    
    def compute_auc(self):
        iteration_list = list(range(self.iterations)) 
        full_df = self.full_df
        auc_df = pd.DataFrame(index=full_df.index.unique(), columns=['R2', 'MSE', 'MAE', 'CA', 'ARRMSE'])

        for fold_index in full_df.index.unique():
            fold_df = full_df.loc[fold_index]
            for metric in auc_df.columns:
                x = fold_df['Iterations']
                y = fold_df[metric]
                auc_value = auc(x, y)
                auc_df.loc[fold_index, metric] = auc_value

        auc_df['Method'] = self.method
        self.auc_df = auc_df
        return self.auc_df
    
    def compile_methods(self):
        methods = ['target_qbc', 'masster_cotraining', 'masster_self_learning', 'pct']
        resume_auc = pd.DataFrame()
        for unique_method in methods:
            self.method = unique_method
            full_df = self.concate_dataframes()
            auc_df = self.compute_auc()
            resume_auc[unique_method] = auc_df[self.metric_name]
            self.resume_auc = resume_auc

        return self.resume_auc


In [87]:
class Evaluator:
    def __init__(self, dataset_name, metric_name, iterations, folds):
        self.dataset_name = dataset_name
        self.metric_name = metric_name
        self.iterations = iterations
        self.folds = folds
        self.all_methods = ['target_qbc', 'masster_cotraining', 'masster_self_learning', 'pct']

    # method is in self.all_methods
    def generate_path(self, fold_number, method):
        if method == 'target_qbc':
            type = 'active_learning'
        if method == 'masster_cotraining' or method == 'masster_self_learning':
            type = 'proposed_method'
        if method == 'pct':
            type = 'semi_supervised_learning'
        file_path = Path(f'reports\{type}\{self.dataset_name}\{method}_results_fold_{fold_number}.csv')
        return file_path
    
    def read_clean_dataframe(self, file_path):
        if file_path.exists():
            df = pd.read_csv(file_path, index_col=0)
            iteration_list = list(range(self.iterations)) 
            df = df[df['Iterations'].isin(iteration_list)]
        else:
            print(file_path)
        return df
    
    def concate_dataframes(self, method):
        self.full_df = pd.DataFrame()
        for fold_number in list(range(self.folds)):
            file_path = self.generate_path(fold_number, method)
            df = self.read_clean_dataframe(file_path)

            self.full_df = pd.concat([self.full_df, df], axis=0)
        
        self.full_df['Method'] = method

        return self.full_df #by method
    
    def compute_auc(self, method):
        full_df = self.full_df # by method
        auc_df = pd.DataFrame(index=full_df.index.unique(), columns=['R2', 'MSE', 'MAE', 'CA', 'ARRMSE'])

        for fold_index in full_df.index.unique():
            fold_df = full_df.loc[fold_index]
            for metric in auc_df.columns:
                x = fold_df['Iterations']
                y = fold_df[metric]
                auc_value = auc(x, y)
                auc_df.loc[fold_index, metric] = auc_value

        auc_df['Method'] = method
        self.auc_df = auc_df
        return self.auc_df
    
    def compile_methods(self):
        # for each metric
        resume_auc = pd.DataFrame()
        for unique_method in self.all_methods:
            self.method = unique_method
            full_df = self.concate_dataframes(unique_method)
            auc_df = self.compute_auc(unique_method)
            resume_auc[unique_method] = auc_df[self.metric_name]
            self.resume_auc = resume_auc

        return self.resume_auc
    
    def save_reports(self):
        output_path_auc = Path(f'reports/paper_evaluation/{self.dataset_name}_resume_auc_{self.metric_name}.csv')
        self.resume_auc.to_csv(output_path_auc)

        description_auc_df = self.resume_auc.describe()
        description_auc_path = Path(f'reports/paper_evaluation/{self.dataset_name}_resume_auc_{self.metric_name}_description.csv')
        description_auc_df.to_csv(description_auc_path)

def run_reports(dataset_names, metric_names, iterations, folds):
    for dataset in dataset_names:
        for metric in metric_names:
            evaluator = Evaluator(dataset, metric, iterations, folds)
            resume_auc = evaluator.compile_methods()

In [88]:

iterations = 5
folds = 10
dataset_names = ['atp7d']
metric_names = ['ARRMSE', 'CA', 'MAE', 'MSE', 'R2'] 
run_reports(dataset_names, metric_names, iterations, folds)

reports\active_learning\atp7d\target_qbc_results_fold_0.csv


NameError: 

In [84]:
evaluater_module_1 = Evaluator('atp7d', 'target_qbc', 'R2', 5, 10)

full_df = evaluater_module_1.concate_dataframes()
auc_df_1 = evaluater_module_1.compute_auc()
resume_auc = evaluater_module_1.compile_methods()


In [86]:
auc_df_1['ARRMSE']

Fold_Index
0    2.68665
1    2.83975
2     2.5343
3    2.63855
4     2.6867
5     2.3775
6      2.288
7    2.92545
8    2.09245
9    2.62845
Name: ARRMSE, dtype: object

In [77]:
auc_df_2

Unnamed: 0_level_0,R2,MSE,MAE,CA,ARRMSE,Method
Fold_Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1.64565,2.1568,1.78985,0.86725,2.8971,masster_cotraining
1,1.72885,2.2342,1.74605,0.9054,2.95715,masster_cotraining
2,2.19225,3.059,2.1947,0.7429,2.66435,masster_cotraining
3,2.17025,1.32645,1.5536,1.01695,2.58645,masster_cotraining
4,1.74835,2.1493,1.59545,1.0763,2.8726,masster_cotraining
5,2.18005,1.38345,1.6281,0.7486,2.684,masster_cotraining
6,2.2421,2.03805,1.9936,0.60445,2.6274,masster_cotraining
7,1.56025,1.85545,1.80265,0.8898,3.04005,masster_cotraining
8,2.6643,1.73735,1.72015,0.76555,2.2699,masster_cotraining
9,1.7621,1.60735,1.4568,1.1667,2.8998,masster_cotraining


In [None]:
def compile_methods(self):
        methods = ['target_qbc', 'masster_cotraining', 'masster_self_learning', 'pct']
        auc_results = {metric: pd.DataFrame() for metric in ['R2', 'MSE', 'MAE', 'CA', 'ARRMSE']}
        for unique_method in methods:
            self.method = unique_method
            full_df = self.concate_dataframes()
            auc_df = self.compute_auc()

            for metric in auc_results.keys():
                auc_results[metric][unique_method] = auc_df[metric]
                return [auc_results[metric] for metric in auc_results.keys()]


In [None]:
def compile_methods(self):
    methods = ['target_qbc', 'masster_cotraining', 'masster_self_learning', 'pct']
    auc_results = pd.DataFrame()
    for unique_method in methods:
        self.method = unique_method
        full_df = self.concate_dataframes()
        auc_df = self.compute_auc()

        auc_results[unique_method] = auc_df[self.metric]
    
    return auc_results