In [1]:
# configuring the path of Kaggle.json file
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [2]:
!kaggle datasets download -d shouravchy/sports-video-classification

Downloading sports-video-classification.zip to /content
100% 487M/488M [00:06<00:00, 84.7MB/s]
100% 488M/488M [00:06<00:00, 77.1MB/s]


In [3]:
from zipfile import ZipFile
import os
import cv2
import numpy as np

In [4]:
from sklearn.preprocessing import LabelBinarizer
from imutils import paths

In [5]:
datapath = '/content/sports-video-classification.zip'
output_model = '/content/model/video_classification_model'
output_label_binarizer = '/content/model/vide_classification_binarizer'

In [6]:
with ZipFile(datapath, 'r') as zip:
  zip.extractall()
  print("Extraction completed.")

Extraction completed.


In [7]:
sports_labels = set(['boxing', 'swimming', 'table_tennis'])

datapath = '/content/data'
path_to_images = list(paths.list_images(datapath))
data = []
labels = []

for images in path_to_images:
  label = images.split(os.path.sep)[-2]
  if label not in sports_labels:
    continue
  image = cv2.imread(images)
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  image = cv2.resize(image, (32, 32))
  data.append(image)
  labels.append(label)

In [8]:
data = np.array(data)
labels = np.array(labels)

In [9]:
# One Hot Encoding
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

In [10]:
labels

array([[0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       ...,
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0]])

In [None]:
data

In [12]:
from sklearn.model_selection import train_test_split

In [13]:
X_train, X_test, Y_train, Y_test = train_test_split(data, labels, test_size = 0.25, stratify = labels, random_state = 42)

In [14]:
from keras.preprocessing.image import ImageDataGenerator

In [15]:
training_augmentation = ImageDataGenerator(
    rotation_range = 10,
    zoom_range = 0.15,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.15, 
    horizontal_flip = True,
    fill_mode = "nearest" 
)

validation_augmentation = ImageDataGenerator()
mean = np.array([123.68, 116.779, 103.939], dtype = 'float32')
training_augmentation.mean = mean
validation_augmentation.mean = mean

In [16]:
from keras.applications import ResNet50
from keras.layers import Input
from keras.layers.pooling import AveragePooling2D
from keras.layers.core import Flatten
from keras.layers.core import Dense
from keras.layers.core import Dropout
from keras.models import Model
from keras.layers import UpSampling2D

In [17]:
from keras import models

In [18]:
cnn_base = ResNet50(weights = 'imagenet', include_top = False, input_shape=(256,256,3))
cnn_base.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 256, 256, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 262, 262, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 128, 128, 64  9472        ['conv1_pad[0][0]']              
                                )              

In [19]:
no_of_classes = 3

model = models.Sequential()
model.add(UpSampling2D((2,2)))
model.add(UpSampling2D((2,2)))
model.add(UpSampling2D((2,2)))
model.add(cnn_base)
model.add(Flatten())
model.add(Dense(512, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(no_of_classes, activation = 'softmax'))

In [20]:
from keras.optimizers import SGD

In [21]:
optimizer = SGD(lr = 0.0001, momentum = 0.9, decay = 1e-4/25)

  super().__init__(name, **kwargs)


In [22]:
model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['acc'])

In [23]:
history = model.fit(X_train, Y_train, epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [24]:
loss, accuracy = model.evaluate(X_test, Y_test)



In [25]:
import pickle

In [26]:
model.save(output_model)
lbinarizer = open('/content/model/video_classifier_binarizer.pickle', 'wb')
lbinarizer.write(pickle.dumps(lb))
lbinarizer.close()



In [29]:
from collections import deque

In [30]:
mean = np.array([123.68, 116.779, 103.939][::1], dtype = 'float32')
queue = deque(maxlen = 128)

In [31]:
output_video = '/content/model/demo_output.avi'
from google.colab.patches import cv2_imshow

In [None]:
cap = cv2.VideoCapture('/content/One_Of_the_Hardest_Punches_Ever_AdobeExpress.mp4')
writer = None
height = None
width = None
while True:
  ret, frame = cap.read()
  if not ret:
    break
  if width is None or height is None:
    width, height = frame.shape[:2]

  output = frame.copy()
  frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
  frame = cv2.resize(frame, (32,32)).astype("float32")
  frame -= mean
  pred = model.predict(np.expand_dims(frame, axis = 0))[0]
  queue.append(pred)
  result = np.array(queue).mean(axis = 0)
  i = np.argmax(result)
  label = lb.classes_[i]
  text = "They are playing {}".format(label)
  cv2.putText(output, text, (10, 200), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255,0,0), 2)

  if writer is None:
    fourcc = cv2.VideoWriter_fourcc(*"MJPG")
    writer = cv2.VideoWriter("output_video", fourcc, 30, (width, height), True)
  writer.write(output)
  cv2_imshow(output)
  key = cv2.waitKey(1) & 0xFF

  if key == ord("q"):
    break

writer.release()
cap.release()