In [2]:
#necessary libraries
from keras.preprocessing import image
import numpy as np
import pandas as pd
from tqdm import tqdm
from keras.models import Sequential
from keras.applications.vgg16 import VGG16
from keras.layers import Dense,  Dropout
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint

#Remember, we created a .csv file that contains the names of each frame and their corresponding tag? Let’s read it as well:
#This is how the first five rows look like. We have the corresponding class or tag for each frame.

train = pd.read_csv('datas/train_skate.csv')
train.head()

Unnamed: 0,image,class
0,Fail_001.mp4_frame0.jpg,Fail
1,Fail_001.mp4_frame1.jpg,Fail
2,Fail_001.mp4_frame2.jpg,Fail
3,Fail_001.mp4_frame3.jpg,Fail
4,Fail_002.mp4_frame0.jpg,Fail


In [3]:
#using this .csv file, we will read the frames that we extracted earlier and then store those frames as a NumPy array:
# creating an empty list
train_image = []

# for loop to read and store frames
for i in tqdm(range(train.shape[0])):
    # loading the image and keeping the target size as (224,224,3)
    img = image.load_img('datas/train_frames/' + train['image'][i], target_size=(224, 224, 3))
    # converting it to array
    img = image.img_to_array(img)
    # normalizing the pixel value
    img = img / 255
    # appending the image to the train_image list
    train_image.append(img)

# converting the list to numpy array
X = np.asarray(train_image)
# shape of the array
X.shape

100%|██████████| 500/500 [00:03<00:00, 151.96it/s]


(500, 224, 224, 3)

In [4]:
# Next, we will create the validation set.

#To create the validation set,
# we need to make sure that the distribution of each class is similar in both training and validation sets.
# We can use the stratify parameter to do that:

# separating the target
y = train['class']
print(y)

# creating the training and validation set
#Here, stratify = y (which is the class or tags of each frame)
# keeps the similar distribution of classes in both the training as well as the validation set.
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2, stratify = y)


# creating dummies of target variable for train and validation set
#Remember – there are 3 categories in which a video can be classified.
# So, we will have to create 3 different columns in the target,
# one for each category.

# We will use the get_dummies() function for that:
y_train = pd.get_dummies(y_train)
y_test = pd.get_dummies(y_test)

0       Fail
1       Fail
2       Fail
3       Fail
4       Fail
       ...  
495    Slide
496    Slide
497    Slide
498    Slide
499    Slide
Name: class, Length: 500, dtype: object


In [5]:
#Defining the architecture of the video classification model

#Since we do not have a very large dataset,
# creating a model from scratch might not work well.
# So, we will use a pre-trained model and take its learnings to solve our problem.

#For this particular dataset,
# we will be using the VGG-16 pre-trained model.
# Let’s create a base model of the pre-trained model:

# creating the base model of pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=False)

In [6]:
#This model was trained on a dataset that has 1,000 classes.
# We will fine tune this model as per our requirement.
# include_top = False will remove the last layer of this model so that we can tune it as per our need.

#Now, we will extract features from this pre-trained model for our training and validation images:

# extracting features for training frames
X_train = base_model.predict(X_train)
X_train.shape


(400, 7, 7, 512)

In [7]:
#We have 400 images in the training set
# and the shape has been changed to (7, 7, 512)
# since we have passed these images through the VGG16 architecture.
# Similarly, we will extract features for validation frames:

# extracting features for validation frames
X_test = base_model.predict(X_test)
X_test.shape

(100, 7, 7, 512)

In [8]:
#There are 100 images in the validation set
# and the shape of these images has also changed to (7, 7, 512).
# We will use a fully connected network now to fine-tune the model.
# This fully connected network takes input in single dimension.
# So, we will reshape the images into a single dimension:

# reshaping the training as well as validation frames in single dimension
X_train = X_train.reshape(400, 7*7*512)
X_test = X_test.reshape(100, 7*7*512)

In [9]:
#It is always advisable to normalize the pixel values, i.e.,
# keep the pixel values between 0 and 1.
# This helps the model to converge faster.

# normalizing the pixel values
max = X_train.max()
X_train = X_train/max
X_test = X_test/max

#Next, we will create the architecture of the model.
# We have to define the input shape for that.
# So, let’s check the shape of our images:

# shape of images
X_train.shape

(400, 25088)

In [10]:
#The input shape will be 25088.
# Let’s now create the architecture.

#We have multiple fully connected dense layers.
# I have added dropout layers as well so that the model will not overfit.
# The number of neurons in the final layer is equal to
# the number of classes that we have and hence the number of neurons here is 3.

#defining the model architecture
model = Sequential()
model.add(Dense(1024, activation='relu', input_shape=(25088,)))
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))

In [11]:
#Training the video classification model

#We will now train our model using the training frames and validate the model using validation frames.
# We will save the weights of the model so that we will not have to retrain the model again and again.
#So, let’s define a function to save the weights of the model:

mcp_save = ModelCheckpoint('weights_skate.hdf5', save_best_only=True, monitor='val_loss', mode='min')

In [12]:
#We will decide the optimum model based on the validation loss.
# Note that the weights will be saved as weights_skate.hdf5.
# Before training the model, we have to compile it:

# compiling the model
model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])

In [13]:
#We are using the categorical_crossentropy as the loss function and the optimizer is Adam.
# Let’s train the model:

# training the model
model.fit(X_train, y_train, epochs=200, validation_data=(X_test, y_test), callbacks=[mcp_save], batch_size=128)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x23095b0c550>