## Import packages

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
from skimage.feature import hog
from sklearn.svm import LinearSVC
import math
from sklearn.metrics import accuracy_score, fbeta_score, precision_score, recall_score
import shutil
import random


### Defining split ratio for training and testing

In [4]:
# split data
ratio = 0.8
size = 1000
split_ratio = math.floor(ratio * size)

data = pd.read_csv('./train.csv')
data = data[['video_id', 'video_frame', 'annotations']]
data["annotations"] = data["annotations"].map(lambda x: eval(x))

### Storing COTS count per image

In [5]:
data['count'] = 0
for i in range(0, len(data['annotations'])):
    data['count'][i] = len(data['annotations'][i])
data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['count'][i] = len(data['annotations'][i])


Unnamed: 0,video_id,video_frame,annotations,count
0,0,0,[],0
1,0,1,[],0
2,0,2,[],0
3,0,3,[],0
4,0,4,[],0
...,...,...,...,...
23496,2,10755,[],0
23497,2,10756,[],0
23498,2,10757,[],0
23499,2,10758,[],0


### Split data into training and testing dataframes

In [6]:
training_df = data[0:split_ratio]
testing_df = data[split_ratio:]
training_df

Unnamed: 0,video_id,video_frame,annotations,count
0,0,0,[],0
1,0,1,[],0
2,0,2,[],0
3,0,3,[],0
4,0,4,[],0
...,...,...,...,...
795,0,1188,[],0
796,0,1189,[],0
797,0,1190,[],0
798,0,1191,[],0


# Collect hog features for images in training size

In [7]:
window_size = (720, 1280)   # Change to (64, 128) for quicker collection

train_images = []
hog_images = []

cots_df = []
no_cots_df = []

cots_features = []
no_cots_features = []

cots_labels = []
no_cots_labels = []

for i in range(0, len(training_df['video_id'])):
    video_id = str(training_df['video_id'][i])
    frame_id = str(training_df['video_frame'][i])
    path = 'train_images/video_'+video_id+'/'+frame_id+'.jpg'
    img = cv2.imread(path, cv2.COLOR_BGR2RGB)
    train_images.append(img)
    
    if training_df['count'][i] == 0:
        # If no COTS in image
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        img = cv2.resize(img, window_size)
        no_cots_df.append(img)
        feature = hog(img, orientations=9, pixels_per_cell=(8,8), cells_per_block=(2,2), visualize=False, feature_vector=True)
        no_cots_features.append(feature)
        no_cots_labels.append(0)
    else:
        # If COTS is in image
        for j in training_df['annotations'][i]:
            cots = img[j['y']:j['y']+j['height'], j['x']:j['x']+j['width']]
            cots = cv2.cvtColor(cots, cv2.COLOR_RGB2GRAY)
            cots = cv2.resize(cots, window_size)
            cots_df.append(cots)
            feature, hog_image = hog(cots, orientations=9, pixels_per_cell=(8,8), cells_per_block=(2,2), visualize=True, feature_vector=True)
            cots_features.append(feature)
            hog_images.append(hog_image)
            cots_labels.append(training_df['count'][i])


### Split into testing and training for SVM

In [8]:
x = np.asarray(cots_features + no_cots_features)
y = np.asarray(cots_labels + no_cots_labels)
y =  np.array(y).reshape(len(y),1)
data_frame = np.hstack((x,y))
print(data_frame.shape)
np.random.shuffle(data_frame)

percentage = 80
partition = int(len(x)*percentage/100)

x_train, x_test = data_frame[:partition,:-1],  data_frame[partition:,:-1]
y_train, y_test = data_frame[:partition,-1:].ravel() , data_frame[partition:,-1:].ravel()

(830, 509437)


## Predict with split testing data and print results

In [9]:
model = LinearSVC()
model.fit(x_train, y_train)
# Get predicitions on test data using model
y_pred = model.predict(x_test)
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Precision: ", precision_score(y_test, y_pred, average='macro'))
print("Recall: ", recall_score(y_test, y_pred, average='macro'))
print("F1 Score: ", fbeta_score(y_test, y_pred, beta=1, average='macro'))
print("F2 Score: ", fbeta_score(y_test, y_pred, beta=2, average='macro'))

Accuracy:  0.9819277108433735
Precision:  0.9511111111111111
Recall:  0.967418546365915
F1 Score:  0.958894724823019
F2 Score:  0.9639154186331712


## Random Images Selection Functions
- get_random_images() => Get random images from a folder of images
- store_images() => Store images using image paths in folder 'test_images'

In [10]:
def get_random_images(image_folder, num_images):
    image_paths = []
    while (len(image_paths) < num_images):
        for dir1 in os.listdir(image_folder):
            for dir2 in os.listdir(os.path.join(image_folder, dir1)):
                image_path = os.path.join(image_folder, dir1, dir2)
                if (random.randint(0,10000) > 9999 and len(image_paths) < 10):
                    image_paths.append(image_path)

    return image_paths

In [11]:
def store_images(image_paths):
    os.makedirs('test_images')
    for i in range(0, len(image_paths)):
        shutil.copy(image_paths[i], 'test_images')

### Select random images from train_images and store in test_images
Make sure 'test_images' directory does not exist when running this

In [12]:
num_of_test_images = 10     # Change this to the number of images you want to test

test_images = get_random_images('train_images', num_of_test_images)
store_images(test_images)
print(test_images)

['train_images/video_2/9625.jpg', 'train_images/video_2/631.jpg', 'train_images/video_2/10623.jpg', 'train_images/video_0/9910.jpg', 'train_images/video_2/4197.jpg', 'train_images/video_2/5115.jpg', 'train_images/video_1/4462.jpg', 'train_images/video_0/9572.jpg', 'train_images/video_2/9478.jpg', 'train_images/video_1/8604.jpg']


In [13]:
def get_image_paths(image_folder):
    image_paths = []
    for dir1 in os.listdir(image_folder):
        image_path = os.path.join(image_folder, dir1)
        image_paths.append(image_path)
            
    return image_paths

## Predict with random testing data and print results
NOTE: Manually fill in test_y_tests with number of COTS in each test image to show results

In [14]:
test_images = get_image_paths('test_images')

test_y_preds = []
test_y_tests = [[0], [0], [1], [0], [0], [0], [0], [0], [1], [0]]   # Manually modify to match test images

# Get predicitions on test_data images using model
i = 0
for image in test_images:
    img = cv2.imread(image, cv2.COLOR_BGR2RGB)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    img = cv2.resize(img, window_size)
    feature = hog(img, orientations=9, pixels_per_cell=(8,8), cells_per_block=(2,2), visualize=False, feature_vector=True)
    test_y_pred = model.predict(feature.reshape(1,-1))
    test_y_preds.append(test_y_pred)
    i += 1

print("Accuracy: ", accuracy_score(test_y_tests, test_y_preds))
print("Precision: ", precision_score(test_y_tests, test_y_preds, average='macro'))
print("Recall: ", recall_score(test_y_tests, test_y_preds, average='macro'))
print("F1 Score: ", fbeta_score(test_y_tests, test_y_preds, beta=1, average='macro'))
print("F2 Score: ", fbeta_score(test_y_tests, test_y_preds, beta=2, average='macro'))


Accuracy:  0.8
Precision:  0.4
Recall:  0.5
F1 Score:  0.4444444444444445
F2 Score:  0.47619047619047616


  _warn_prf(average, modifier, msg_start, len(result))
