# Analysing accuracy of robust and non-robust models
## 1. Individual Analysis

In [30]:
class AnalysisModule:
    """ Module for easy analysis of details.log json file and rgb images """

    def __init__(self, file_location):
        self.file_location = file_location
        self.image_location = file_location + "images/"
        self.df = self._loadDetailsJson(file_location)
        self.varyingVariables = self._outputVaryingVariables()

    def _loadDetailsJson(self, file_location):
        """ load json detail.log """
        import pandas as pd 
        import numpy as np
        import json
        class_map = json.load(open(file_location+'class_maps.json'))
        df = pd.DataFrame.from_records(list(map(json.loads, open(file_location+'details.log').readlines())))
        df = df.drop('render_args', axis=1).join(pd.DataFrame(df.render_args.values.tolist()))
        df['prediction'] = df['prediction'].apply(lambda x: class_map[x[0]])
        df['is_correct'] = (df['is_correct'] == 'True') 
        return df
    
    def _outputVaryingVariables(self):
        """ Get list of dependent variables """
        varying_variables = []
        #ignore columns 0-3 ('is_correct', 'loss', 'prediction', 'id')
        for colname in self.df.columns[4:]:
            if (self.df[colname].dtype == "object"): # if it is not numeric
                if (self.df[colname].nunique() > 1):
                    varying_variables.append(colname)
            else: # if it is numeric
                if (self.df[colname].var() > 0.0001): 
                    varying_variables.append(colname)
        return varying_variables

    def variableSettingAccuracies(self, variable_name):
        """ Accuracy on one variable, across different values """
        if (variable_name in self.varyingVariables):
            return self.df.groupby(variable_name).agg(
                accuracy=('is_correct', 'mean')
               ).sort_values(['accuracy'],ascending=True)
        else:
            print("Variable name either does not exist in the dataframe or remains constant through the experiment")
            print("Valid Variable Entries: "+ str(self.varyingVariables))
            print("To get overall accuracy use .overallAccuracy()")

    def overallAccuracy(self):
        """ Accuracy across all variables """
        print("Overall Accuracy: "+str(self.df['is_correct'].mean()))

    def varianceOfVariableAccuracies(self):
        """ Which variables affects accuracy the most (changing of settings cause drastic changes in accuracy) """
        import pandas as pd 
        variances = {}
        for i, colname in enumerate(self.varyingVariables):
            variances[colname] = [float(self.df.groupby(colname).agg(accuracy=('is_correct', 'mean')).var())]
        return pd.DataFrame(variances).transpose().rename({0: 'Accuracy Variance'}, axis=1).sort_values(['Accuracy Variance'],ascending=False)

    def lowestAccuracyPerVariable(self):
        """ Which setting in each variable yield the lowest accuracy? """
        import pandas as pd 
        lowestAccuracies = {}
        for i, colname in enumerate(self.varyingVariables):
            variableSettingAccuracies = self.variableSettingAccuracies(colname)
            lowestAccuracies[colname] = [variableSettingAccuracies["accuracy"].idxmin(), 
                                        variableSettingAccuracies["accuracy"].min()]
        return pd.DataFrame(lowestAccuracies).transpose().rename({0: 'lowest_setting', 
                                                                    1:'lowest_accuracy'}, axis=1).sort_index()
    
    def _filterImageIds(self, filter_input):
        """
        Get list of ids of images that pass through the filters

        Filter_input should be in this format (inclusive of edges):

            filter_input = { 
                'HazeControl.haze_fac' : 0,
                'OrbitingCameraControl.theta': (0, 1.57),
                'RainControl.n_layers_of_rain': (0, 3),
                'SunControl.elevation': 0
            }
            
        """
        filtered_df = self.df.copy()
        for key, value in filter_input.items():
            if (type(value) is tuple):
                filtered_df = filtered_df.loc[(filtered_df[key]<=value[1]) & (filtered_df[key]>=value[0])]
            else:
                filtered_df = filtered_df.loc[filtered_df[key]==value]
        return filtered_df['id'].tolist()

    def _getImageLabel(self, image_id, displayed_values):
        """ Get label of an image """
        dictionaryToDisplay = self.df[self.df['id']==image_id][displayed_values].to_dict('records')[0]
        label = ""
        for key, value in dictionaryToDisplay.items():
            label += str(key)+": "+ str(value)+"\n"
        return label
    
    def _getImageCorrect(self, image_id):
        import pandas as pd 
        """ Find out if image was correctly predicted by the model. """
        return self.df[self.df["id"]==image_id]['is_correct'].bool()

    def _getImagePrediction(self, image_id):
        """ Find out if image was correctly predicted by the model. """
        import pandas as pd 
        return self.df[self.df["id"]==image_id]['prediction'].to_string(index=False)

    def getFilteredImages(self, filter_input, columns=4, displayed_values=[]):
        """ Display Filtered Images """
        from math import ceil
        import matplotlib.pyplot as plt
        from matplotlib.image import imread

        #ensure that columns in displayed_values can be found in the dataframe
        if (not (all(elem in self.df.columns for elem in displayed_values))):
            print("Not all columns in display_values are in the dataframe")
            return
        elif (len(displayed_values)==0): #if no values entered for displayed_values
            displayed_values=self.varyingVariables
        
        #ensure that we are filtering by existent columns in the dataframe
        if (not (all(elem in self.df.columns for elem in filter_input))):
            print("Not all columns in filter_input are in the dataframe")
            return

        # Get number of rows
        image_ids = self._filterImageIds(filter_input)
        num_images = len(image_ids)
        rows = ceil(num_images/columns)

        #if there are no images
        if (len(image_ids)<=0):
            print("There are no images.")
            return

        #if there are images

        #Get figure height
        figure_width = 20
        size_of_each_image = figure_width/columns
        size_of_each_line_in_label = 0.5
        number_of_displayed_values = len(displayed_values)
        size_of_image_label = size_of_each_image + size_of_each_line_in_label*number_of_displayed_values 
        figure_height = rows * size_of_image_label

        #plot images
        plt.figure(figsize=(figure_width, figure_height))
        for i, image_id in enumerate(image_ids):
            image_filename = self.image_location + image_id + "_rgb.png"
            ax = plt.subplot(rows, columns, i + 1)
            plt.title(self._getImageLabel(image_id, displayed_values))
            color = "green" if self._getImageCorrect(image_id) else "red"
            for spine in ax.spines.values():
                spine.set_edgecolor(color)
                spine.set_linewidth(7)
            ax.text(9, 17, self._getImagePrediction(image_id),
                bbox={'facecolor': 'white', 'alpha': 1, 'pad': 4})
            plt.imshow(imread(image_filename))
        plt.show()

    def getVariableLineGraph(self, variable):
        """ Plot line graph between varying variable (eg. intensity of haze) and accuracy"""
        import pandas as pd
        if (variable not in self.df.columns):
            print("This variable cannot be found in the dataframe")
            return
        elif (variable not in self.varyingVariables):
            print("This variable remains constant throughout all images")
            return
        variable_dataframe = self.variableSettingAccuracies(variable).sort_index()
        variable_dataframe.plot.line(y='accuracy')
    
    def getVariableCorrelation(self, variable):
        """ Get correlation between a continuous variable and accuracy"""
        import pandas as pd
        if (variable not in self.df.columns):
            print("This variable cannot be found in the dataframe")
            return
        elif (variable not in self.varyingVariables):
            print("This variable remains constant throughout all images")
            return
        variable_dataframe = self.variableSettingAccuracies(variable)
        pcc  = variable_dataframe["accuracy"].corr(variable_dataframe.index.to_series())
        print("Pearson's Correlation Coefficient: "+str(pcc))

In [67]:
non_robust = AnalysisModule("../non_robust/results/")

In [13]:
non_robust.varyingVariables

['OrbitingCameraControl.phi', 'OrbitingCameraControl.theta']

In [35]:
non_robust.variableSettingAccuracies('OrbitingCameraControl.phi')

Unnamed: 0_level_0,accuracy
OrbitingCameraControl.phi,Unnamed: 1_level_1
0.0,0.0
0.174444,0.0
0.348889,0.0
0.523333,0.0
1.046667,0.1
0.697778,0.2
0.872222,0.2
1.221111,0.2
1.395556,0.3
1.57,0.4


In [36]:
non_robust.variableSettingAccuracies('OrbitingCameraControl.theta')

Unnamed: 0_level_0,accuracy
OrbitingCameraControl.theta,Unnamed: 1_level_1
-3.14,0.0
-2.442222,0.0
-0.348889,0.0
0.348889,0.0
2.442222,0.0
3.14,0.0
1.744444,0.1
-1.744444,0.3
-1.046667,0.5
1.046667,0.5


In [66]:
robust = AnalysisModule("../robust/results/")

In [39]:
robust.overallAccuracy()

Overall Accuracy: 0.02


In [42]:
robust.variableSettingAccuracies('OrbitingCameraControl.phi')

Unnamed: 0_level_0,accuracy
OrbitingCameraControl.phi,Unnamed: 1_level_1
0.0,0.0
0.174444,0.0
0.348889,0.0
0.523333,0.0
0.697778,0.0
0.872222,0.0
1.221111,0.0
1.395556,0.0
1.046667,0.1
1.57,0.1


In [44]:
robust.variableSettingAccuracies('OrbitingCameraControl.theta')

Unnamed: 0_level_0,accuracy
OrbitingCameraControl.theta,Unnamed: 1_level_1
-3.14,0.0
-2.442222,0.0
-0.348889,0.0
0.348889,0.0
1.046667,0.0
1.744444,0.0
2.442222,0.0
3.14,0.0
-1.744444,0.1
-1.046667,0.1


In [55]:
robust_acc = robust.variableSettingAccuracies('OrbitingCameraControl.theta')
non_robust_acc  = non_robust.variableSettingAccuracies('OrbitingCameraControl.theta')
robust_acc.rename(columns={"accuracy":"rob_acc"}, inplace=True)
non_robust_acc.rename(columns={"accuracy":"non_rob_acc"}, inplace=True)
joined = robust_acc.join(non_robust_acc)
joined.sort_index(inplace=True)
joined.to_csv('varying_theta.csv')
joined

Unnamed: 0_level_0,rob_acc,non_rob_acc
OrbitingCameraControl.theta,Unnamed: 1_level_1,Unnamed: 2_level_1
-3.14,0.0,0.0
-2.442222,0.0,0.0
-1.744444,0.1,0.3
-1.046667,0.1,0.5
-0.348889,0.0,0.0
0.348889,0.0,0.0
1.046667,0.0,0.5
1.744444,0.0,0.1
2.442222,0.0,0.0
3.14,0.0,0.0


In [57]:
robust_acc = robust.variableSettingAccuracies('OrbitingCameraControl.phi')
non_robust_acc  = non_robust.variableSettingAccuracies('OrbitingCameraControl.phi')
robust_acc.rename(columns={"accuracy":"rob_acc"}, inplace=True)
non_robust_acc.rename(columns={"accuracy":"non_rob_acc"}, inplace=True)
joined = robust_acc.join(non_robust_acc)
joined.sort_index(inplace=True)
joined.to_csv('varying_phi.csv')
joined

Unnamed: 0_level_0,rob_acc,non_rob_acc
OrbitingCameraControl.phi,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.0,0.0
0.174444,0.0,0.0
0.348889,0.0,0.0
0.523333,0.0,0.0
0.697778,0.0,0.2
0.872222,0.0,0.2
1.046667,0.1,0.1
1.221111,0.0,0.2
1.395556,0.0,0.3
1.57,0.1,0.4


In [60]:
robust.df.head()

Unnamed: 0,is_correct,loss,prediction,id,environment,model,output_type,OrbitingCameraControl.phi,OrbitingCameraControl.theta,OrientationControl.rotation_x,...,OrbitingCameraControl.radius,SunControl.size,SunControl.intensity,SunControl.elevation,SunControl.rotation,SunControl.altitude,SunControl.air,SunControl.dust,SunControl.ozone,SunControl.background_strength
0,False,5.72372,birdhouse,27e48cc8-9434-4931-8337-20570703c1ca,studioX_Stage.blend,025_mug.blend,classes,0.0,-3.14,-1.57,...,0.3,3.0,1.0,1.57,0.0,1.0,3.0,1.0,1.0,0.1
1,False,4.581645,"flatworm, platyhelminth",c74d682c-9785-418d-bbc5-66f94afaa64d,studioX_Stage.blend,025_mug.blend,classes,0.0,-2.442222,-1.57,...,0.3,3.0,1.0,1.57,0.0,1.0,3.0,1.0,1.0,0.1
2,False,3.9022677,"flatworm, platyhelminth",eaf7bf75-aa75-41c8-9100-125d540dc77e,studioX_Stage.blend,025_mug.blend,classes,0.0,-1.744444,-1.57,...,0.3,3.0,1.0,1.57,0.0,1.0,3.0,1.0,1.0,0.1
3,False,4.289319,"flatworm, platyhelminth",4b72001b-32bd-4337-bde0-fe3bdfa554ce,studioX_Stage.blend,025_mug.blend,classes,0.0,-1.046667,-1.57,...,0.3,3.0,1.0,1.57,0.0,1.0,3.0,1.0,1.0,0.1
4,False,4.5773854,"flatworm, platyhelminth",869e9f72-a21b-4a16-a5cf-5723cbcd84d5,studioX_Stage.blend,025_mug.blend,classes,0.0,-0.348889,-1.57,...,0.3,3.0,1.0,1.57,0.0,1.0,3.0,1.0,1.0,0.1


In [72]:
robust_df = robust.df.copy()
robust_df.columns = [str(col) + "_rob" for col in robust_df.columns]
non_robust_df = non_robust.df.copy()
non_robust_df.columns = [str(col) + "_non_rob" for col in non_robust_df.columns]
import pandas as pd
merged_df = pd.merge(non_robust_df, robust_df,  how='left', left_on=['OrbitingCameraControl.theta_non_rob','OrbitingCameraControl.phi_non_rob'], right_on = ['OrbitingCameraControl.theta_rob','OrbitingCameraControl.phi_rob'])
merged_df.to_csv("merged_df.csv")

In [73]:
filter_input = {
    'is_correct_rob': True,
    'is_correct_non_rob': False,
}
output = merged_df.copy()
for key, value in filter_input.items():
    output = output[output[key]==value]
output['loss_rob']

Unnamed: 0,is_correct_non_rob,loss_non_rob,prediction_non_rob,id_non_rob,environment_non_rob,model_non_rob,output_type_non_rob,OrbitingCameraControl.phi_non_rob,OrbitingCameraControl.theta_non_rob,OrientationControl.rotation_x_non_rob,...,OrbitingCameraControl.radius_rob,SunControl.size_rob,SunControl.intensity_rob,SunControl.elevation_rob,SunControl.rotation_rob,SunControl.altitude_rob,SunControl.air_rob,SunControl.dust_rob,SunControl.ozone_rob,SunControl.background_strength_rob
62,False,1.4662951,cup,01ccb810-7961-4060-988a-05407085be89,studioX_Stage.blend,025_mug.blend,classes,1.046667,-1.744444,-1.57,...,0.3,3.0,1.0,1.57,0.0,1.0,3.0,1.0,1.0,0.1
