# Evaluate accuracy of the classifier on training data

In [14]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [10]:
# Mapping column names in csv file
# Used for feature generation and training

cam_a_k = 'cam_a'         # Camera incline relative to a ground surface, deg
cam_y_k = 'y'             # Ground surface offset (negative camera height) relative to a camera origin, m
z_est_k = 'z_est'         # Distance to the closest object point (for a camera) estimated by feature extractor, m
z_k = 'z'                 # Real object distance the closest object point (for a camera), m
x_est_k = 'x_est'         # Central object x coordinate estimated by feature extractor, m
x_k = 'x'                 # Real central object x coordinate, m
w_est_k = 'width_est'     # Object width estimated by feature extractor, m
ww_k = 'ww'               # Real object width, m
h_est_k = 'height_est'    # Object height estimated by feature extractor, m
hh_k = 'hh'               # Real object height, m
ca_est_k = 'rw_ca_est'    # Object contour area estimated by feature extractor, m2 
o_name_k = 'o_name'       # Unique name of an object
o_class_k = 'o_class'     # Object class as an integer, where 0 is a noise class
ry_k = 'ry'               # Initial offset of r_y (some objects are initially rotated by back to a camera)
b_rec_k = ('x_px', 'y_px', 'w_px', 'h_px')  # Parameters of a bounding rectangle:
# 0 - left upper x coordinate of an object bounding rectangle in image plane, px
# 1 - left upper y coordinate of an object bounding rectangle in image plane, px
# 2 - width of an object bounding rectangle in image plane, px
# 3 - height of an object bounding rectangle in image plane, px
c_ar_px_k = 'c_ar_px'     # Object contour area in image plane, px
thr_k = 'thr'             # Size of the used kernel for morphological dilate on the resulting mask to imitate motion blur
dd_k = 'dd'               # Real object depth, m

In [12]:
def read_dataframe(target_df_path, noises_df_path):
    """
    Read the source training data from files and filter it
    :param target_df_path: path to csv file containing objects' features
    :param noises_df_path: path to csv file containing noises' features
    :return: filtered and merged dataframe
    """
    target_df = pd.read_csv(target_df_path)
    noises_df = pd.read_csv(noises_df_path)
    full_dataframe = pd.concat([noises_df, target_df])

    return full_dataframe


def prepare_data_for_training(full_dataframe, features_cols):
    """
    Prepare data for model fitting: select important features from dataframe and merge them into numpy array
    :param full_dataframe: dataframe describing target and noises classes
    :param features_cols: features indices to take into account
    :return: features, labels
    """
    # All meaningful features
    x_tr = np.stack([full_dataframe[key] for key in features_cols], axis=1)
    y_tr = full_dataframe[o_class_k]
    poly_scale = PolynomialFeatures(2, include_bias=True)  # Increase features polynomial order
    x_tr = poly_scale.fit_transform(x_tr)

    return x_tr, y_tr, poly_scale


def estimate_clf(clf, X_test, y):
    # Evaluate accuracy
    def gen_report(y, y_pred):
        report = 'Precision P=TP/TP+FP\nRecall R=TP/TP+FN\nF1 score F1=2*(P*R)/(P+R)\n'
        report += f'Accuracy {accuracy_score(y, y_pred)}\n'
        report += f'{classification_report(y, y_pred)}\n'
        report += f'{confusion_matrix(y, y_pred)}\n'

        return report

    # Classify the dataset
    y_pred = clf.predict(X_test)
    # y_prob = clf.predict_proba(X_test)
    return gen_report(y, y_pred)

## Read data 

In [5]:
features_path = '../features.csv'
noises_path = '../noises.csv'

dt = read_dataframe(features_path, noises_path)
print(f'Input data shape: {dt.shape}')
print(f'Cases: angles {dt[cam_a_k].unique()}, heights {dt[cam_y_k].unique()}')

Input data shape: (546981, 21)
Cases: angles [-39.], heights [-3.32 -3.4 ]


## Select features for a particular height and angle

In [18]:
angle = -39
height = -3.32

In [19]:
dt_ = dt[(dt[cam_a_k] == angle) & (dt[cam_y_k] == height)]

## Load trained classifier

In [21]:
import pickle
all_classifiers = pickle.load(open('../clf.pcl', "rb"))
print(all_classifiers)

poly = all_classifiers['poly']
clf = all_classifiers[height][angle]

{-3.32: {-39.0: LogisticRegression(C=3, n_jobs=-1, solver='newton-cg', verbose=1)}, -3.4: {-39.0: LogisticRegression(C=3, n_jobs=-1, solver='newton-cg', verbose=1)}, 'poly': PolynomialFeatures()}


## Print report

In [23]:
feature_vector = [w_est_k, h_est_k, z_est_k]  # Name of columns are used for training
x_train, y_train, _ = prepare_data_for_training(dt_, feature_vector)
print(estimate_clf(clf, x_train, y_train))

Precision P=TP/TP+FP
Recall R=TP/TP+FN
F1 score F1=2*(P*R)/(P+R)
Accuracy 0.8822727872119089
              precision    recall  f1-score   support

           0       0.98      0.97      0.98     40000
           1       0.86      0.97      0.91    153862
           2       0.70      0.37      0.49     38550
           3       0.97      0.95      0.96     40592

    accuracy                           0.88    273004
   macro avg       0.88      0.82      0.83    273004
weighted avg       0.87      0.88      0.87    273004

[[ 38753    260    150    837]
 [    20 149220   4622      0]
 [    46  23602  14427    475]
 [   609     63   1456  38464]]

