# Imports

In [6]:
try:
    %run ../dataset/dataset.ipynb
except:
    pass

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# Dataset viewer

In [7]:
class DatasetViewer:
    """
    Responsible for displaying graphical information about a dataset.
    """
    
    # -------------------------------------------------------------------------
    #           Constructor
    # -------------------------------------------------------------------------
    def __init__(self, dataset: CoverageDataset, export_chart=True, export_format='png'):
        """
        Displays graphical information about a dataset.
        
        :param      dataset: Dataset to be displayed
        :param      export_chart: True to export generated charts; false to not export
        :param      export_format: Extension (without dot) of exported charts 
        """
        self.__dataset = dataset.get_dataframe()
        self.__export_chart = export_chart
        self.__export_format = '.' + export_format
        self.__export_path = '../../../../../../out'
        
    
    # -------------------------------------------------------------------------
    #           Methods
    # -------------------------------------------------------------------------
    def display_cyclomatic_groupby_5(self):
        cyclomatic_dataset = self.__build_dataset_groupby_cyclomatic(self.__cyclomatic_classifier_groupby_5)
        self.__build_cyclomatic_chart(cyclomatic_dataset, ['0 - 1', '2 - 3', '4 - 5', '6 - 10', '> 10'])
        self.__export_current_chart('cyclomatic-groupby-5')
        self.__display_current_chart()
        
    def __cyclomatic_classifier_groupby_5(self, level):
        if level <= 1:
            return '0 - 1'
        elif level <= 3:
            return '2 - 3'
        elif level <= 5:
            return '4 - 5'
        elif level <= 10:
            return '6 - 10'

        return '> 10'
        
    def __build_dataset_groupby_cyclomatic(self, cyclomatic_classifier):
        cyclomatic_dataset = self.__dataset
        cyclomatic_dataset['Cyclomatic_Level'] = self.__dataset['Cyclomatic'].apply(lambda value: cyclomatic_classifier(value))
        cyclomatic_dataset_groupby = cyclomatic_dataset.groupby('Cyclomatic_Level').count()
        cyclomatic_dataset_groupby['Level'] = cyclomatic_dataset_groupby.index.values
        
        return cyclomatic_dataset_groupby
        
    def __build_cyclomatic_chart(self, dataset_groupby_cyclomatic, labels):
        ax = sns.barplot(x='Cyclomatic', y='Level', data=dataset_groupby_cyclomatic, orient='h', order=labels)
        ax.set(xlabel='Total', ylabel='Cyclomatic')
        
    def __export_current_chart(self, title):
        if not self.__export_chart:
            return
        
        plt.savefig(self.__export_path + '/' + title + self.__export_format, dpi=300)
        
    def __display_current_chart(self, config=None):
        if config is None:
            plt.show()
        else:
            plt.show(config)
    
    def display_cyclomatic_groupby_4(self):
        cyclomatic_dataset = self.__build_dataset_groupby_cyclomatic(self.__cyclomatic_classifier_groupby_4)
        self.__build_cyclomatic_chart(cyclomatic_dataset, ['0 - 2', '3 - 5', '6 - 10', '> 10'])
        self.__export_current_chart('cyclomatic-groupby-4')
        self.__display_current_chart()
    
    def __cyclomatic_classifier_groupby_4(self, level):
        if level <= 2:
            return '0 - 2'
        elif level <= 5:
            return '3 - 5'
        elif level <= 10:
            return '6 - 10'

        return '> 10'
    
    def display_cyclomatic_groupby_3(self):
        cyclomatic_dataset = self.__build_dataset_groupby_cyclomatic(self.__cyclomatic_classifier_groupby_3)
        self.__build_cyclomatic_chart(cyclomatic_dataset, ['0 - 3', '4 - 9', '>= 10'])
        self.__export_current_chart('cyclomatic-groupby-3')
        self.__display_current_chart()

    def __cyclomatic_classifier_groupby_3(self, level):
        if level <= 3:
            return '0 - 3'
        elif level <= 9:
            return '4 - 9'

        return '>= 10'
        
    def display_cyclomatic_overview(self):
        self.__display_dataset_size()
        self.__display_average_complexity()
        self.__display_dataset_complexity_chart()
        
    def __display_dataset_size(self):
        print('Dataset size:', self.__dataset.shape[0])
        
    def __display_average_complexity(self):
        print('Average complexity:', self.__dataset['Cyclomatic'].mean())
        
    def __display_dataset_complexity_chart(self):
        plt.title("Dataset complexity")
        sns.histplot(self.__dataset['Cyclomatic'])
        self.__export_current_chart('cyclomatic-overview')
        self.__display_current_chart()
        
    def display_total_records_with_ppc(self, value):
        print('Total records with ppc {0}: {1}'.format(value, self.__get_total_records_with_ppc(value)))
        
    def __get_total_records_with_ppc(self, value):
        return self.__dataset[self.__dataset['Cyclomatic'] == value].shape[0]
        
    def display_dataframe(self):
        display(self.__dataset)
        
    def display_project_contributions(self):
        project_dataset = self.__build_dataset_groupby_project(self.__signature_to_project)
        self.__build_project_contribution_chart(project_dataset)
        self.__export_current_chart('project-contribution')
        self.__display_current_chart()
        
    def __signature_to_project(self, signature):
        project = ''

        if 'com.puppycrawl.tools.checkstyle' in signature:
            project = 'Checkstyle'
        elif 'exp4j' in signature:
            project = 'Exp4j'
        elif 'biojava' in signature:
            project = 'Biojava'
        elif 'org.jfree' in signature:
            project = 'Jfreechart'
        elif 'com.urbanairship' in signature:
            project = 'Urban Airship Java Library'
        elif 'org.apache.commons.text' in signature:
            project = 'Apache Commons Text'
        elif 'dubbo' in signature:
            project = 'Apache Dubbo'
        elif 'org.apache.commons.lang3' in signature:
            project = 'Apache Commons Lang'
        elif 'math3' in signature:
            project = 'Apache Commons Math'

        return project
    
    def __build_dataset_groupby_project(self, project_classifier):
        project_dataset = self.__dataset
        project_dataset['Project'] = self.__dataset['Name'].apply(project_classifier)
        
        return project_dataset
    
    def __build_project_contribution_chart(self, dataset_groupby_project, width=16, height=5, hue=None):
        plt.figure(figsize=(width,height))
        plt.title("Contribution of each project to the dataset ")
        sns.countplot(y='Project', data=dataset_groupby_project, orient='h', hue=hue)
    
    def display_project_contributions_by_cyclomatic(self):
        project_dataset = self.__build_dataset_groupby_project(self.__signature_to_project)
        self.__build_project_contribution_chart(project_dataset, height=10, hue='Cyclomatic')
        self.__export_current_chart('project-contribution-by-cyclomatic')
        self.__display_current_chart()
        
    def display_ppc_ec_chart(self):
        chart = self.__build_ppc_ec_chart()
        self.__export_current_chart('ec-ppc')
        self.__display_current_chart(chart)
        
    def __build_ppc_ec_chart(self):
        ax = sns.jointplot(
            x='EdgeCoverage', 
            y='PrimePathCoverage', 
            data=self.__dataset, 
            kind='scatter', 
            color='purple'
        )
        
        ax.set_axis_labels('Edge coverage', 'Prime Path Coverage')
        
        return ax
        
    def display_correlation_heatmap(self):
        chart = self.__build_correlation_chart()
        self.__export_current_chart('correlation-heatmap')
        self.__display_current_chart(chart)
        
    def __build_correlation_chart(self):
        plt.figure(figsize=(10,7))
        plt.title("Metrics correlation")
        
        return sns.heatmap(self.__dataset.corr(), vmin=-1, vmax=1)
        
    def display_correlation_table(self):
        ppc_correlation_table = self.__build_ppc_correlation_table()
        
        self.__display_ppc_correlation_chart(ppc_correlation_table)
        display(ppc_correlation_table)
        
    def __build_ppc_correlation_table(self):
        correlation_table = pd.DataFrame(columns=['Metrics', 'Correlation'])
        correlations = self.__get_correlation_of('PrimePathCoverage')
        
        correlation_table['Metrics'] = correlations.index
        correlation_table['Correlation'] = correlations.values
        correlation_table = correlation_table.sort_values(ascending=False, by='Correlation')
        
        return correlation_table
    
    def __get_correlation_of(self, metric):
        correlations = self.__dataset.corr()[metric]
        correlations = correlations.drop(metric)
        
        return correlations

    def __display_ppc_correlation_chart(self, ppc_correlation_table):
        plt.figure(figsize=(10,8))
        sns.barplot(y='Metrics', x='Correlation', data=ppc_correlation_table)
        
    def display_dataset_overview_table_groupby_project(self):
        overview_project_table = self.__build_overview_project_table()
        self.__display_dataframe_using_title(overview_project_table, 'Cyclomatic')
        
    def __build_overview_project_table(self):
        project_dataset = self.__build_dataset_groupby_project(self.__signature_to_project)
        
        groupby_project_dataset = project_dataset.groupby('Project')
        
        return groupby_project_dataset['Cyclomatic'].describe()
        
    def display_dataset_loc_table_groupby_project(self):
        loc_table = self.__build_loc_table()
        
        self.__display_dataframe_using_title(loc_table, 'Count line code')
        
    def __build_loc_table(self):
        project_dataset = self.__build_dataset_groupby_project(self.__signature_to_project)
        groupby_project_dataset = project_dataset.groupby('Project')
        
        loc_table = groupby_project_dataset['CountLineCode'].describe()
        loc_table['sum'] = groupby_project_dataset['CountLineCode'].sum()
        
        return loc_table
        
    def __display_dataframe_using_title(self, dataframe, title):
        styled_dataframe = dataframe.style.set_caption(title).set_table_styles([{
            'selector': 'caption',
            'props': [
                ('color', 'black'),
                ('font-size', '16px')
            ]
        }])
        display(styled_dataframe)
        
        
    def display_dataset_overview_table(self):
        display(self.__dataset.describe())
        
    
    # -------------------------------------------------------------------------
    #           Getters and Setters
    # -------------------------------------------------------------------------    
    def set_export_path(self, path):
        self.__export_path = path
        
    def get_export_path(self):
        return self.__export_path

# Machine learning views

In [None]:
class MlPpcViewer:
    
    def __init__(self, evaluator):
        self.evaluator = evaluator
    
    def display_noscaled_evaluation(self):
        self.__display_dataframe_using_title(
            self.evaluator.get_noscaled_metrics_table(), 
            'Without scaling'
        )
    
    def __display_dataframe_using_title(self, dataframe, title):
        styled_dataframe = dataframe.style.set_caption(title).set_table_styles([{
            'selector': 'caption',
            'props': [
                ('color', 'black'),
                ('font-size', '16px')
            ]
        }])
        display(styled_dataframe)
        
    def display_scaled_evaluation(self):
        self.__display_dataframe_using_title(
            self.evaluator.get_scaled_metrics_table(), 
            'With scaling'
        )