In [1]:
import os
import re
import math
import cv2
import pandas as pd
import numpy as np

from tqdm import tqdm
from glob import glob
from tools.settings import *
from keras.models import Sequential
from keras.preprocessing import image
from keras.layers import Dense, Dropout
from keras.applications.vgg16 import VGG16
from tensorflow.keras.utils import to_categorical
from tools.train_val_test_spliter import split

In [112]:
# To split dataset if already splits folder already exits no need to run it
split()

Splitting the ginen dataset into Train Test=0.4 Validation=0.1
Done


In [2]:
train = pd. read_csv(os.path.join(dataset_path, "train.csv"))
test = pd. read_csv(os.path.join(dataset_path, "test.csv"))
val = pd. read_csv(os.path.join(dataset_path, "val.csv"))

In [3]:
train.head()

Unnamed: 0,Video_url,action
0,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,slap
1,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,punch
2,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,kick
3,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,kick
4,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,punch


In [4]:
test.head()

Unnamed: 0,Video_url,action
0,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,slap
1,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,slap
2,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,punch
3,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,kick
4,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,slap


In [5]:
val.head()

Unnamed: 0,Video_url,action
0,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,punch
1,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,kick
2,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,slap
3,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,kick
4,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,slap


In [6]:
print(train.shape)
print(test.shape)
print(val.shape)

(143, 2)
(111, 2)
(27, 2)


In [13]:
def convert_to_frame(data, folder_name):
    '''
    Generated filenames format dataset_path/folder_name/video_name_frame{number}_action.jpg
    '''
    directory = os.path.join(dataset_path, folder_name)
    if not os.path.exists(directory):
        os.makedirs(directory)
    for i in tqdm(range(data.shape[0])):
        video_file = data['Video_url'][i]
        action = data['action'][i]
        video_name_list = video_file.split('/')[-1].split('.')
        video_name_list = video_name_list[:-1]
        video_name = ""
        for n in video_name_list:
            video_name += n
        # capturing the video from the given path
        capture = cv2.VideoCapture(video_file) 
        #frame rate
        frame_rate = capture.get(5)
        count = 0
        while(capture.isOpened()):
            #current frame number
            frame_id = capture.get(1) 
            read_correctly, frame = capture.read()
            if not read_correctly:
                break
            if (frame_id % math.floor(frame_rate) == 0):
                # storing the frames in a new folder named train_1
                filename = directory + "/" + video_name + "_frame{}_".format(count) + action +".jpg"
                count += 1
                cv2.imwrite(filename, frame)
        capture.release()
    print("Successfully Converted")

In [89]:
convert_to_frame(train, train_frames_path_name)

100%|██████████| 143/143 [00:50<00:00,  2.84it/s]

Successfully Converted





In [15]:
convert_to_frame(val, val_frames_path_name)

100%|██████████| 27/27 [00:09<00:00,  2.89it/s]

Successfully Converted





In [17]:
def create_paths_csv(directory, file_name):
    images = os.listdir(directory)
    images_path_list = []
    images_action_list = [] 
    for image in images:
        images_path_list.append(directory + image)
        images_action_list.append(image.split('.')[0].split('_')[-1])
    df = pd.DataFrame()
    df['image'] = images_path_list
    df['action'] = images_action_list
    print(os.path.join(dataset_path, file_name+'.csv'))
    df.to_csv(os.path.join(dataset_path, file_name+'.csv'), index=False)

In [121]:
create_paths_csv(train_frames_path, train_frames_path_name)

/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-time-action-recognition-from-video-footage/dataset/train_frames.csv


In [18]:
create_paths_csv(val_frames_path, val_frames_path_name)

/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-time-action-recognition-from-video-footage/dataset/val_frames.csv


In [33]:
train_image = pd.read_csv(os.path.join(dataset_path, 'train_frames.csv'))
train_image.head()

Unnamed: 0,image,action
0,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,punch
1,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,slap
2,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,slap
3,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,slap
4,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,slap


In [8]:
print(train_image.shape)

(620, 2)


In [9]:
val_image = pd.read_csv(os.path.join(dataset_path, 'val_frames.csv'))
val_image.head()

Unnamed: 0,image,action
0,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,slap
1,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,slap
2,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,kick
3,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,slap
4,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,slap


In [10]:
print(val_image.shape)

(115, 2)


In [11]:
action_values = list(train_image['action'].unique())
action_values

['punch', 'slap', 'kick']

In [30]:
def create_class_columns(df):
    for value in action_values:
        df[value] = np.where(df['action'].str.contains(value), 1, 0)
    df.drop('action', axis='columns', inplace=True)

In [34]:
create_class_columns(train_image)
train_image.head()

Unnamed: 0,image,punch,slap,kick
0,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,1,0,0
1,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,0,1,0
2,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,0,1,0
3,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,0,1,0
4,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,0,1,0


In [39]:
create_class_columns(val_image)
val_image.head()

Unnamed: 0,image,punch,slap,kick
0,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,0,1,0
1,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,0,1,0
2,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,0,0,1
3,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,0,1,0
4,/home/mushfiqul/Mushfiqul/CSE/Thesis2.0/real-t...,0,1,0


In [13]:
def convert_to_array_and_split(image_data):
    image_value = []
    for i in tqdm(range(image_data.shape[0])):
        img = image.load_img(image_data['image'][i], target_size=(224,224,3))
        img = image.img_to_array(img)
        # normalizing the pixel value
        img = img / 255
        image_value.append(img)

    X = np.array(image_value)
    y = image_data
    y.drop('image', axis='columns', inplace=True)
    return X, y

In [35]:
X_train, y_train = convert_to_array_and_split(train_image)
print(X_train.shape)

100%|██████████| 620/620 [00:11<00:00, 53.66it/s]


(620, 224, 224, 3)


In [15]:
X_val, y_val = convert_to_array_and_split(val_image)
print(X_val.shape)

100%|██████████| 115/115 [00:01<00:00, 59.44it/s]

(115, 224, 224, 3)





In [36]:
y_train.head()

Unnamed: 0,punch,slap,kick
0,1,0,0
1,0,1,0
2,0,1,0
3,0,1,0
4,0,1,0


In [17]:
y_val.head()

Unnamed: 0,action
0,slap
1,slap
2,kick
3,slap
4,slap


In [18]:
'''This model was trained on a dataset that has 1,000 classes. 
include_top = False will remove the last layer of this model so that we can tune it as per our need.
'''
base_model = VGG16(weights='imagenet', include_top=False)

In [19]:
# extracting features for training frames
X_train = base_model.predict(X_train)
X_train.shape

(620, 7, 7, 512)

In [20]:
X_val = base_model.predict(X_val)
X_val.shape

(115, 7, 7, 512)

In [21]:
# reshaping the training as well as validation frames in single dimension
X_train = X_train.reshape(620, 7*7*512)
X_val = X_val.reshape(115, 7*7*512)

In [22]:
# normalizing the pixel values
max_pixel = X_train.max()
X_train = X_train / max_pixel
X_val = X_val / max_pixel
print(X_train.shape)
print(X_val.shape)

(620, 25088)
(115, 25088)


In [27]:
# The input shape will be 25,088
model = Sequential()
model.add(Dense(1024, activation='relu', input_shape=(25088,)))
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
model.summary()

In [24]:
# defining a function to save the weights of best model
from keras.callbacks import ModelCheckpoint
mcp_weight = ModelCheckpoint('weight.hdf5', save_best_only=True, monitor='val_loss', mode='min')

In [25]:
model.fit(X_train, y_train, epochs=50, validation_data=(X_val, y_val), callbacks=[mcp_weight], batch_size=128)

Epoch 1/50


ValueError: in user code:

    /home/mushfiqul/anaconda3/envs/action-recognition/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /home/mushfiqul/anaconda3/envs/action-recognition/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /home/mushfiqul/anaconda3/envs/action-recognition/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/mushfiqul/anaconda3/envs/action-recognition/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/mushfiqul/anaconda3/envs/action-recognition/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/mushfiqul/anaconda3/envs/action-recognition/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:788 run_step  **
        outputs = model.train_step(data)
    /home/mushfiqul/anaconda3/envs/action-recognition/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:755 train_step
        loss = self.compiled_loss(
    /home/mushfiqul/anaconda3/envs/action-recognition/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:203 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    /home/mushfiqul/anaconda3/envs/action-recognition/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:152 __call__
        losses = call_fn(y_true, y_pred)
    /home/mushfiqul/anaconda3/envs/action-recognition/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:256 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    /home/mushfiqul/anaconda3/envs/action-recognition/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /home/mushfiqul/anaconda3/envs/action-recognition/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:1537 categorical_crossentropy
        return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
    /home/mushfiqul/anaconda3/envs/action-recognition/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /home/mushfiqul/anaconda3/envs/action-recognition/lib/python3.8/site-packages/tensorflow/python/keras/backend.py:4833 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    /home/mushfiqul/anaconda3/envs/action-recognition/lib/python3.8/site-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, 1) and (None, 3) are incompatible


In [28]:
model.load_weights("weight.hdf5")
# model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
# model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 1024)              25691136  
_________________________________________________________________
dropout_4 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 512)               524800    
_________________________________________________________________
dropout_5 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_6 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 128)              

In [40]:
from scipy import stats as s
predict = []
actual = []
if not os.path.exists(test_frames_path):
    os.makedirs(test_frames_path)

for i in tqdm(range(test.shape[0])):
    video_file = test['Video_url'][i]
    action = test['action'][i]
    video_name_list = video_file.split('/')[-1].split('.')
    video_name_list = video_name_list[:-1]
    video_name = ""
    for n in video_name_list:
        video_name += n
    # capturing the video from the given path
    capture = cv2.VideoCapture(video_file) 
    #frame rate
    frame_rate = capture.get(5)
    count = 0
    files = glob(test_frames_path + '/*')
    #removing all files from folder
    for f in files:
        os.remove(f)
    while(capture.isOpened()):
        #current frame number
        frame_id = capture.get(1) 
        read_correctly, frame = capture.read()
        if not read_correctly:
            break
        if (frame_id % math.floor(frame_rate) == 0):
            # storing the frames in a new folder named train_1
            filename = test_frames_path + "/" + video_name + "_frame{}_".format(count) + action +".jpg"
            count += 1
            cv2.imwrite(filename, frame)
    capture.release()
    
    # reading all the frames from temp folder
    images = glob(test_frames_path + '/*.jpg')
    prediction_images = []
    for i in range(len(images)):
        img = image.load_img(images[i], target_size=(224,224,3))
        img = image.img_to_array(img)
        img = img / 255
        prediction_images.append(img)
        
    # converting all the frames for a test video into numpy array
    prediction_images = np.array(prediction_images)
    # extracting features using pre-trained model
    prediction_images = base_model.predict(prediction_images)
    # converting features in one dimensional array
    prediction_images = prediction_images.reshape(prediction_images.shape[0], 7*7*512)
    # predicting tags for each array
    prediction = np.argmax(model.predict(prediction_images), axis=-1)
    # appending the mode of predictions in predict list to assign the tag to the video
    predict.append(y_train.columns.values[s.mode(prediction)[0][0]])
    # appending the actual tag of the video
    actual.append(action)

100%|██████████| 111/111 [04:47<00:00,  2.59s/it]


In [37]:
y_train.columns.values[0]

'punch'

In [44]:
from sklearn.metrics import accuracy_score
accuracy_score(predict, actual)*100

92.7927927927928

In [43]:
# for i in range(0, len(predict)):
#     print(predict[i] + " " + actual[i])

punch slap
slap slap
kick punch
kick kick
slap slap
slap slap
slap slap
kick kick
punch punch
slap slap
slap slap
slap slap
punch punch
slap slap
slap slap
kick kick
kick kick
slap slap
punch punch
punch punch
slap slap
punch slap
punch punch
slap slap
slap slap
kick kick
slap slap
slap slap
punch punch
kick kick
slap slap
punch punch
kick kick
slap slap
punch punch
slap slap
punch punch
slap slap
slap slap
punch punch
slap slap
punch punch
punch punch
kick kick
punch punch
kick kick
kick kick
punch punch
punch punch
kick kick
punch punch
slap slap
slap slap
slap slap
slap slap
slap slap
punch punch
slap slap
slap punch
punch punch
punch punch
kick kick
kick punch
punch punch
punch slap
slap slap
slap slap
slap slap
punch punch
kick kick
punch punch
slap slap
punch punch
slap slap
kick kick
kick kick
punch kick
punch punch
punch punch
slap slap
kick kick
kick kick
kick kick
kick kick
kick kick
kick kick
slap slap
slap slap
kick kick
kick kick
slap slap
slap slap
slap slap
punch punch
s