# Imports

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# Dataset views

In [1]:
class CoverageDatasetViewer:
    def __init__(self, dataset, export_chart=True):
        self.__dataset = dataset.get_dataframe()
        self.__export_chart = export_chart
        
    def display_cyclomatic_groupby_5(self):
        d_tmp = self.__dataset
        d_tmp['Cyclomatic_Level'] = self.__dataset['Cyclomatic'].apply(lambda value: self.__get_level_of_cyclomatic_groupby_5(value))
        d_tmp_groupby = d_tmp.groupby('Cyclomatic_Level').count()
        d_tmp_groupby['Level'] = d_tmp_groupby.index.values
        ax = sns.barplot(x='Cyclomatic', y='Level', data=d_tmp_groupby, orient='h', order=['0 - 1', '2 - 3', '4 - 5', '6 - 10', '> 10'])
        ax.set(xlabel='Total', ylabel='Cyclomatic')
        
        if self.__export_chart:
            plt.savefig('cyclomatic-groupby-5.png', dpi=300)
        
        plt.show()
        
    def __get_level_of_cyclomatic_groupby_5(self, level):
        if level <= 1:
            return '0 - 1'
        elif level <= 3:
            return '2 - 3'
        elif level <= 5:
            return '4 - 5'
        elif level <= 10:
            return '6 - 10'

        return '> 10'
    
    def display_cyclomatic_groupby_4(self):
        d_tmp = self.__dataset
        d_tmp['Cyclomatic_Level'] = self.__dataset['Cyclomatic'].apply(lambda value: self.__get_level_of_cyclomatic_groupby_4(value))
        d_tmp_groupby = d_tmp.groupby('Cyclomatic_Level').count()
        d_tmp_groupby['Level'] = d_tmp_groupby.index.values
        ax = sns.barplot(x='Cyclomatic', y='Level', data=d_tmp_groupby, orient='h', order=['0 - 2', '3 - 5', '6 - 10', '> 10'])
        ax.set(xlabel='Total', ylabel='Cyclomatic')
        
        if self.__export_chart:
            plt.savefig('cyclomatic-groupby-4.png', dpi=300)
        
        plt.show()
    
    def __get_level_of_cyclomatic_groupby_4(self, level):
        if level <= 2:
            return '0 - 2'
        elif level <= 5:
            return '3 - 5'
        elif level <= 10:
            return '6 - 10'

        return '> 10'
    
    def display_cyclomatic_groupby_3(self):
        d_tmp = self.__dataset
        d_tmp['Cyclomatic_Level'] = self.__dataset['Cyclomatic'].apply(lambda value: self.__get_level_of_cyclomatic_groupby_3(value))
        d_tmp_groupby = d_tmp.groupby('Cyclomatic_Level').count()
        d_tmp_groupby['Level'] = d_tmp_groupby.index.values
        ax = sns.barplot(x='Cyclomatic', y='Level', data=d_tmp_groupby, orient='h', order=['0 - 3', '4 - 9', '>= 10'])
        ax.set(xlabel='Number of instances', ylabel='Cyclomatic value')
        
        if self.__export_chart:
            plt.savefig('cyclomatic-groupby-3.png', dpi=300)
        
        plt.show()

    def __get_level_of_cyclomatic_groupby_3(self, level):
        if level <= 3:
            return '0 - 3'
        elif level <= 9:
            return '4 - 9'

        return '>= 10'
        
    def display_cyclomatic_overview(self):
        print('Dataset size:', self.__dataset.shape[0])
        print('Average complexity:', self.__dataset['Cyclomatic'].mean())
        plt.title("Dataset complexity")
        sns.histplot(self.__dataset['Cyclomatic'])
        
        if self.__export_chart:
            plt.savefig('cyclomatic-overview.png', dpi=300)
        
        plt.show()
        
    def display_total_records_with_ppc(self, value):
        total_records = self.__dataset[self.__dataset['Cyclomatic'] == value].shape[0]
        print('Total records with ppc {0}: {1}'.format(value, total_records))
        
    def display_dataframe(self):
        display(self.__dataset)
        
    def display_project_contributions(self):
        d_tmp = self.__dataset
        d_tmp['Project'] = self.__dataset['Name'].apply(self.__signature_to_project)
        plt.figure(figsize=(16,5))
        plt.title("Contribution of each project to the dataset ")
        contrib_projects = d_tmp.groupby('Project')
        contrib_projects = contrib_projects.count()
        sns.countplot(y='Project', data=d_tmp, orient='h')
        
        if self.__export_chart:
            plt.savefig('project-contribution.png', dpi=300)
        
        plt.show()
        
    def __signature_to_project(self, signature):
        project = ''

        if 'com.puppycrawl.tools.checkstyle' in signature:
            project = 'Checkstyle'
        elif 'exp4j' in signature:
            project = 'Exp4j'
        elif 'biojava' in signature:
            project = 'Biojava'
        elif 'org.jfree' in signature:
            project = 'Jfreechart'
        elif 'com.urbanairship' in signature:
            project = 'Urban Airship Java Library'
        elif 'org.apache.commons.text' in signature:
            project = 'Apache Commons Text'
        elif 'dubbo' in signature:
            project = 'Apache Dubbo'
        elif 'org.apache.commons.lang3' in signature:
            project = 'Apache Commons Lang'
        elif 'math3' in signature:
            project = 'Apache Commons Math'

        return project
    
    def display_project_contributions_by_cyclomatic(self):
        d_tmp = self.__dataset
        d_tmp['Project'] = self.__dataset['Name'].apply(self.__signature_to_project)
        plt.figure(figsize=(16,10))
        sns.countplot(y='Project', data=d_tmp, orient='h', hue='Cyclomatic')
        
        if self.__export_chart:
            plt.savefig('project-contribution-by-cyclomatic.png', dpi=300)
        
        plt.show()
        
    def display_ppc_ec_chart(self):
        ax = sns.jointplot(x='EdgeCoverage', y='PrimePathCoverage', data=self.__dataset, kind='scatter', color='purple')
        ax.set_axis_labels('Edge coverage', 'Prime Path Coverage')
        
        if self.__export_chart:
            plt.savefig('ec-ppc.png', dpi=300)
        
        plt.show(ax)
        
    def display_correlation_heatmap(self):
        plt.figure(figsize=(10,7))
        plt.title("Metrics correlation")
        ax = sns.heatmap(self.__dataset.corr(), vmin=-1, vmax=1)
        
        if self.__export_chart:
            plt.savefig('correlation-heatmap.png', dpi=300)
        
        plt.show(ax)
        
    def display_correlation_table(self):
        correlations = self.__dataset.corr()['PrimePathCoverage']
        correlations = correlations.drop('PrimePathCoverage')

        metrics = correlations.index
        ppc_corr = correlations.values
        corr_table = pd.DataFrame(columns=['Metrics', 'Correlation'])
        corr_table['Metrics'] = metrics
        corr_table['Correlation'] = ppc_corr
        corr_table = corr_table.sort_values(ascending=False, by='Correlation')

        plt.figure(figsize=(10,8))
        sns.barplot(y='Metrics', x='Correlation', data=corr_table)

        display(corr_table)
        
    def display_dataset_overview_table_groupby_project(self):
        d_tmp = self.__dataset
        d_tmp['Project'] = self.__dataset['Name'].apply(self.__signature_to_project)
        d_groupby = d_tmp.groupby('Project')
        display(d_groupby['Cyclomatic'].describe())
        
    def display_dataset_overview_table(self):
        display(self.__dataset.describe())

# Machine learning views

In [None]:
class MlPpcViewer:
    
    def __init__(self, evaluator):
        self.evaluator = evaluator
    
    def display_noscaled_evaluation(self):
        self.__display_dataframe_using_title(
            self.evaluator.get_noscaled_metrics_table(), 
            'Without scaling'
        )
    
    def __display_dataframe_using_title(self, dataframe, title):
        styled_dataframe = dataframe.style.set_caption(title).set_table_styles([{
            'selector': 'caption',
            'props': [
                ('color', 'black'),
                ('font-size', '16px')
            ]
        }])
        display(styled_dataframe)
        
    def display_scaled_evaluation(self):
        self.__display_dataframe_using_title(
            self.evaluator.get_scaled_metrics_table(), 
            'With scaling'
        )