In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline  

RESULTS_INPUT = 'results.csv'
ACTUAL_INPUT = 'actual.xlsx'

### Reading data

In [None]:
actual = pd.read_excel(ACTUAL_INPUT)
predicted = pd.read_csv(RESULTS_INPUT)
actual = actual[['full name', 'Right', 'Left']]
actual = actual.rename(index=str, columns={"Right": "actual_right",
                                  "Left": "actual_left"})
predicted = predicted.rename(index=str, columns={"right_wing (mm)": "predicted_right",
                                     "left_wing (mm)": "predicted_left"})

### Merging togeter 

Creating new columns for the difference in predicted - actual wing length

In [None]:
both = pd.merge(actual, predicted, left_on = 'full name',
                right_on = 'image_id').drop(['image_id'], axis=1)
both['left_diff'] = both['predicted_left'] - both['actual_left']
both['right_diff'] = both['predicted_right'] - both['actual_right']
all_diffs = both['right_diff'].append(both['left_diff'])
both['sum_diff'] = abs(both['right_diff']) + abs(both['left_diff'])
both.loc[both['sum_diff'] < 4].sort_values(by=['sum_diff'], ascending=False).head()

### Calculating stats about data 

Finds the mean and SD and removes all outliers (+/- 2 SD from the mean)

In [None]:
mean = np.mean(all_diffs)
sd = np.std(all_diffs)
lower = mean - 2 * sd
upper = mean + 2 * sd
print("Mean: {mean} SD: {sd}.".format(mean=mean, sd=sd))
print("Lower: {lower} Upper: {upper}.".format(lower=lower, upper=upper))

In [None]:
outliers = all_diffs[(all_diffs < lower) | (all_diffs > upper)]
print("Num outliers: {outliers}".format(outliers=len(outliers)))
all_diffs = all_diffs[(all_diffs > lower) & (all_diffs < upper)]

### Plotting

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
ax = all_diffs.hist(bins='auto')

In [None]:
# filename = 'result_plot.png'# output_path = os.path.normpath(filename)
# plt.xlabel('Difference between (predicted - actual) in mm')
# start, end = ax.get_xlim()
# plt.ylabel('Number of samples')
# plt.title('Error in predicted length')
# plt.savefig(output_path)
# plt.close()