**This portion processes the videos into frames to prepare for learning**

This code uses methods provided in this tutorial [here](https://www.analyticsvidhya.com/blog/2019/09/step-by-step-deep-learning-tutorial-video-classification-python/) in order to process videos and setting up the neural net. Credits go to Pulkit Sharma.

In [0]:
# Lots of headers, though some may be unused rn
import cv2
import math
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
from keras.preprocessing import image
import numpy as np
from keras.utils import np_utils
from skimage.transform import resize
from sklearn.model_selection import train_test_split
from glob import glob
from tqdm import tqdm

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
cd drive/'My Drive'/Videos_DLP_v3

In [0]:
# Prepares stuff for training
# open the .txt file which have names of training videos
f = open("Train.txt", "r")
temp = f.read()
videos = temp.split('\n')

# creating a dataframe using video names
train = pd.DataFrame()
train['video_name'] = videos
train = train[:-1]
train.head()

In [0]:
# Prepares stuff for testing
# open the .txt file which have names of test videos
f = open("Test.txt", "r")
temp = f.read()
videos = temp.split('\n')

# creating a dataframe having video names
test = pd.DataFrame()
test['video_name'] = videos
test = test[:-1]
test.head()

In [0]:
# Tag creation here
# creating tags for training videos
train_video_tag = []
for i in range(train.shape[0]):
    train_video_tag.append(train['video_name'][i].split('/')[0])
    
train['tag'] = train_video_tag

# creating tags for test videos
test_video_tag = []
for i in range(test.shape[0]):
    test_video_tag.append(test['video_name'][i].split('/')[0])
    
test['tag'] = test_video_tag

In [0]:
# Breaks the video by frames for training set
for i in tqdm(range(train.shape[0])):
    count = 0
    videoFile = train['video_name'][i]
    cap = cv2.VideoCapture(videoFile.split(' ')[0])   # capturing the video from the given path
    frameRate = cap.get(5) #frame rate
    x=1
    while(cap.isOpened()):
        frameId = cap.get(1) #current frame number
        ret, frame = cap.read()
        if (ret != True):
            break
        if (frameId % math.floor(frameRate) == 0):
            # storing the frames in a new folder named train_1
            filename ='training/' + videoFile.split(' ')[0] +"_frame%d.jpg" % count;count+=1
            cv2.imwrite(filename, frame)
    cap.release()

In [0]:
# getting the names of all the images for training set
images = glob("training/*.jpg")
train_image = []
train_class = []
for i in tqdm(range(len(images))):
    # creating the image name
    train_image.append(images[i].split('/')[1])
    # creating the class of image
    vidclass = images[i].split('/')[1].split('_')[0]
    train_class.append(vidclass.lower())
    
# storing the images and their class in a dataframe
train_data = pd.DataFrame()
train_data['image'] = train_image
train_data['class'] = train_class 

# converting the dataframename into csv file 
print(train_data['class'])
train_data.to_csv('training/trains.csv',header=True, index=False)

In [0]:
# Breaks the video by frames for testing set
for i in tqdm(range(test.shape[0])):
    count = 0
    videoFile = test['video_name'][i]
    cap = cv2.VideoCapture(videoFile.split(' ')[0])   # capturing the video from the given path
    frameRate = cap.get(5) #frame rate
    x=1
    while(cap.isOpened()):
        frameId = cap.get(1) #current frame number
        ret, frame = cap.read()
        if (ret != True):
            break
        # Save frames every second
        if (frameId % math.floor(frameRate) == 0):
            filename ='testing/' + videoFile.split(' ')[0] +"_frame%d.jpg" % count;count+=1
            cv2.imwrite(filename, frame)
    cap.release()

In [0]:
# getting the names of all the images for testing set
images = glob("testing/*.jpg")
test_image = []
test_class = []
for i in tqdm(range(len(images))):
    # creating the image name
    test_image.append(images[i].split('/')[1])
    # creating the class of image
    vidclass = images[i].split('/')[1].split('_')[0]
    test_class.append(vidclass.lower())
    
# storing the images and their class in a dataframe
test_data = pd.DataFrame()
test_data['image'] = test_image
test_data['class'] = test_class 

# converting the dataframename into csv file 
print(test_data['class'])
test_data.to_csv('testing/testings.csv',header=True, index=False)

**Get ready to train the model**

In [0]:
import keras
from keras.models import Sequential
from keras.applications.vgg16 import VGG16
from keras.layers import Dense, InputLayer, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D
from keras.preprocessing import image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [0]:
ls

In [21]:
train = pd.read_csv('training/trains.csv')
train.head()
# Setting up the frames here
train_image = []

# Processes the images here.
for i in tqdm(range(train.shape[0])):
    img = image.load_img('training/'+train['image'][i], target_size=(224,224,3))
    img = image.img_to_array(img)
    img = img/255
    train_image.append(img)
    
# convert to np array
X = np.array(train_image)

100%|██████████| 418/418 [00:07<00:00, 52.43it/s]


In [0]:
# Creating validation set
# separating the target
y = train['class']

# creating the training and validation set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2, stratify = y)

# creating dummies of target variable for train and validation set
y_train = pd.get_dummies(y_train)
y_test = pd.get_dummies(y_test)

In [0]:
# Creating the base model of pre-trained VGG16 model
# This is one form of CNN that can be used on each frame individually 
base_model = VGG16(weights='imagenet', include_top=False)

In [0]:
X_train = base_model.predict(X_train) # extracting features for training frames
X_test = base_model.predict(X_test) # extracting features for testing frames

In [27]:
X_train.shape

(334, 7, 7, 512)

In [28]:
X_test.shape

(84, 7, 7, 512)

In [0]:
# Reshaping the training as well as validation frames
X_train = X_train.reshape(334, 7*7*512)
X_test = X_test.reshape(84, 7*7*512)

# Normalizing the pixel values
max = X_train.max()
X_train = X_train/max
X_test = X_test/max

In [0]:
# Using the sequential architecture
# Using three layers on the sequential model
# TODO: Change the dropout between the layers
# Sizes must also change
model = Sequential()
model.add(Dense(1000, activation='relu', input_shape=(25088,)))
model.add(Dropout(0.5))
model.add(Dense(500, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))

Training Time

In [0]:
# Compiling the model
model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])

In [0]:
# training the model
model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test),  batch_size=30)
# training the model
# model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test), callbacks=[mcp_save], batch_size=30)

**Time to test everything here**

In [0]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing import image
import numpy as np
import pandas as pd
from tqdm import tqdm
from keras.applications.vgg16 import VGG16
import cv2
import math
import os
from glob import glob
from scipy import stats as s

In [0]:
# getting the test list
f = open("Test.txt", "r")
temp = f.read()
videos = temp.split('\n')

# creating the dataframe
test = pd.DataFrame()
test['video_name'] = videos
test = test[:-1]
test_videos = test['video_name']
test.head()

In [0]:
# creating the tags
train = pd.read_csv('training/trains.csv')
y = train['class']
y = pd.get_dummies(y)

In [0]:
# creating two lists to store predicted and actual tags
predict = []
actual = []

# for loop to extract frames from each test video
for i in tqdm(range(test_videos.shape[0])):
    count = 0
    videoFile = test_videos[i]
    cap = cv2.VideoCapture(videoFile.split(' ')[0])   # capturing the video from the given path
    frameRate = cap.get(5) #frame rate
    # removing all other files from the temp folder
    # !rm -rf temp
    while(cap.isOpened()):
        frameId = cap.get(1) #current frame number
        ret, frame = cap.read()
        if (ret != True):
            break
        if (frameId % math.floor(frameRate) == 0):
            # storing the frames of this particular video in temp folder
            filename ='temp/' + "_frame%d.jpg" % count;count+=1
            cv2.imwrite(filename, frame)
    cap.release()
    
    # reading all the frames from temp folder
    images = glob("temp/*.jpg")
    prediction_images = []
    for i in range(len(images)):
        img = image.load_img(images[i], target_size=(224,224,3))
        img = image.img_to_array(img)
        img = img/255
        prediction_images.append(img)
    prediction_images = np.array(prediction_images)
    prediction_images = base_model.predict(prediction_images)
    prediction_images = prediction_images.reshape(prediction_images.shape[0], 7*7*512)
    prediction = model.predict_classes(prediction_images)
    
    # appending the mode of predictions in predict list to assign the tag to the video
    predict.append(y.columns.values[s.mode(prediction)[0][0]])

    # appending the actual tag of the video
    vidclass = (videoFile.split('_')[0])
    actual.append(vidclass.lower())
    

In [56]:
# checking the accuracy of the predicted tags
from sklearn.metrics import accuracy_score
accuracy_score(predict, actual)*100

93.10344827586206

The end!
