# Imports

In [None]:
try:
    %run ../dataset/dataset.ipynb
except:
    pass

from itertools import chain, combinations
import pandas as pd

# MetricsSelection

In [None]:
class MetricsSelection:
    """
    Responsible for evaluating which is the best subset of metrics, through an
    error metric, for a given machine learning algorithm.
    """
    
    # -------------------------------------------------------------------------
    #           Constructor
    # -------------------------------------------------------------------------
    def __init__(self, regressor: MlPpcEvaluator, history=10):
        """
        Evaluates which is the best subset of metrics, through an error metric, 
        for a given machine learning algorithm.
        
        :param      regressor: Machine learning algorithm
        :param      history: Ranking size of best metric combinations 
        """
        self.__evaluator = evaluator
        self.__history = history
        self.__last_result = None
        
    def evaluate_using_brute_force(self, metrics) -> pd.DataFrame:
        """
        The algorithm will choose each combination and calculate score of each 
        combination then choose best combination based on R2 score. This 
        algorithm is called 'exhaustive feature selection' or 'brute force 
        features selection'.
        
        :param      metrics Metrics to be evaluated
        
        :return:    Table containing top 10 best metrics combination
        """
        top10_best_metrics = self.__generate_empty_top10_best_metrics_table()
        
        metrics_powerset = self.__generate_powerset_of_set(metrics)
        metrics_powerset = self.__remove_empty_subset_of(metrics_powerset)
        
        lowest_r2_of_top10_best_metrics = self.__get_worst_r2_error_metric_of(top10_best_metrics)
        
        for subset in metrics_powerset:
            error_metrics = self.__evaluate(subset)
            r2_error_metric_of_subset = self.__get_r2_error_metric_of(error_metrics)
            
            if r2_error_metric_of_subset > lowest_r2_of_top10_best_metrics:
                self.__update_best_metrics_table(top10_best_metrics, r2_error_metric_of_subset, subset)
                lowest_r2_of_top10_best_metrics = self.__get_worst_r2_error_metric_of(top10_best_metrics)

        return top10_best_metrics 
    
    def __generate_empty_top10_best_metrics_table(self):
        return pd.DataFrame(
            index=[i for i in range(self.__history)],
            columns=['R2', 'Metrics'],
            data=[[0.0, '']]
        )

    def __generate_powerset_of_set(self, iterable):
        "list(powerset([1,2,3])) --> [(), (1,), (2,), (3,), (1,2), (1,3), (2,3), (1,2,3)]"
        s = list(iterable)
        it = chain.from_iterable(combinations(s, r) for r in range(len(s)+1))

        powerset_list = []

        for element in it:
            subset_list = []
            for subset_element in element:
                subset_list.append(subset_element)

            powerset_list.append(subset_list)

        return powerset_list
    
    def __remove_empty_subset_of(self, s):
        s.remove([])
        
        return s
    
    def __get_worst_r2_error_metric_of(self, dataframe):
        idx_worst_r2 = dataframe['R2'].argmin()
        
        return dataframe.loc[idx_worst_r2, 'R2']
    
    def __evaluate(self, metrics):
        self.__evaluator.evaluate(metrics)
        return self.__evaluator.get_noscaled_metrics_table()
    
    def __get_r2_error_metric_of(self, dataframe):
        return float(dataframe[0]['R2 Score'])
    
    def __update_best_metrics_table(self, dataframe, r2, metrics):
        idx_worst_r2 = dataframe['R2'].argmin()
        dataframe.loc[idx_worst_r2, 'R2'] = r2
        dataframe.loc[idx_worst_r2, 'Metrics'] = self.__convert_set_to_text(metrics)
        
    def __convert_set_to_text(self, s):
        return str([str(element) for element in s])
    
    def display_last_result(self):
        pd.set_option('display.max_colwidth', 500)
        results.sort_values(by="R2", ascending=False)
        display(self.__last_result)