# Imports

In [None]:
try:
    %run ../util/MathUtils.ipynb
except:
    pass

import pandas as pd
import numpy as np
from imblearn.under_sampling import RandomUnderSampler

# CoverageDataset

In [54]:
class CoverageDataset:
    """
    Responsible for managing datasets related to test coverage.
    """
    
    def __init__(self, dataset_filepath: str):
        """
        Manages datasets related to test coverage.
        
        :param      dataset_filepath Dataset file location
        """
        self.__dataset = None
        self.__load_dataset(dataset_filepath)
    
    def __load_dataset(self, name):
        self.__dataset = pd.read_csv(name, sep=';')

    def under_sampling(self, sampling):
        rus = RandomUnderSampler(random_state=0, sampling_strategy=sampling)
        X_res, y_res = rus.fit_resample(
                pd.DataFrame(self.__dataset.index.array)[0].values.reshape(-1,1), 
                self.__dataset['Cyclomatic'].values
        )

        self.__dataset['idx'] = self.__dataset.index
        idx_selected = X_res.reshape(1,-1)[0]
        self.__dataset['selected'] = self.__dataset['idx'].apply(lambda idx: idx in idx_selected)
        self.__dataset = self.__dataset[self.__dataset['selected']]
        self.__dataset = self.__dataset.drop('idx', axis=1)
        self.__dataset = self.__dataset.drop('selected', axis=1)

    def remove_last_column(self):
        self.__dataset = self.__dataset.iloc[:,:-1]

    def remove_nan(self):
        self.__dataset = self.__dataset.dropna()

    def convert_coverage_metrics_to_float(self):
        self.__dataset.iloc[:,-1] = self.__dataset.iloc[:,-1].apply(lambda ppc: float(ppc.replace(',', '.')) if type(ppc) == str else ppc)
        self.__dataset.iloc[:,-2] = self.__dataset.iloc[:,-2].apply(lambda ec: float(ec.replace(',', '.')) if type(ec) == str else ppc)

    def remove_coverage_metrics_with_both_zero(self):
        self.__dataset = self.__dataset[(self.__dataset['EdgeCoverage'] != 0) | (self.__dataset['PrimePathCoverage'] != 0)]

    def select_metrics(self, metrics):
        self.__dataset = self.__dataset[metrics]

    def remove_ppc_greater_than_ec(self):
        self.__dataset = self.__dataset[(self.__dataset['PrimePathCoverage'] <= self.__dataset['EdgeCoverage'])]

    def remove_ppc_between(self, start, end):
        self.__dataset = self.__dataset[(self.__dataset['PrimePathCoverage'] < start) | (self.__dataset['PrimePathCoverage'] > end)]
        
    def get_dataframe(self):
        return self.__dataset
    
    def truncate_ppc(self, precision):
        self.__dataset['PrimePathCoverage'] = self.__dataset['PrimePathCoverage'].apply(lambda ppc: MathUtils.truncate(ppc, precision))