In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from google.colab import drive
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, f1_score
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense, Dropout, GlobalAveragePooling3D
import cv2

drive.mount('/content/drive')
base_dir = '/content/drive/MyDrive/cricshot'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!pip install --upgrade tensorflow



In [8]:
def extract_frames(video_path, num_frames=16, img_size=(224, 224)):
    frames = []
    try:
        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        if total_frames < num_frames:
            indices = np.linspace(0, total_frames - 1, total_frames, dtype=int)
        else:
            indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
        for i in indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, i)
            ret, frame = cap.read()
            if ret:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame = cv2.resize(frame, img_size)
                frames.append(frame)
        cap.release()
    except Exception as e:
        print(f"Error processing video: {video_path}")
        print(f"Error message: {str(e)}")

    if len(frames) == 0:
        print(f"No frames extracted from video: {video_path}")
        return None

    while len(frames) < num_frames:
        frames.append(frames[-1])

    return np.array(frames) / 255.0

In [2]:
def make_dataframes(base_dir, num_frames=16):
    filepaths = []
    labels = []
    classlist = sorted(os.listdir(base_dir))

    if not classlist:
        raise ValueError(f"No subdirectories found in {base_dir}. Please check the path.")

    for klass in classlist:
        classpath = os.path.join(base_dir, klass)
        if os.path.isdir(classpath):
            flist = sorted([f for f in os.listdir(classpath) if f.lower().endswith(('.mp4', '.avi', '.mov'))])
            if not flist:
                print(f"Warning: No video files found in {classpath}")
                continue
            desc = f'{klass:25s}'
            for f in tqdm(flist, ncols=130, desc=desc, unit='files', colour='blue'):
                fpath = os.path.join(classpath, f)
                filepaths.append(fpath)
                labels.append(klass)

    if not filepaths:
        raise ValueError(f"No video files found in any subdirectory of {base_dir}. Please check the content of the directories.")

    df = pd.DataFrame({'filepaths': filepaths, 'labels': labels})
    train_df, temp_df = train_test_split(df, train_size=0.7, stratify=df['labels'], random_state=42)
    valid_df, test_df = train_test_split(temp_df, train_size=0.5, stratify=temp_df['labels'], random_state=42)

    print('train_df length:', len(train_df), '  valid_df length:', len(valid_df), '  test_df length:', len(test_df))
    return train_df, valid_df, test_df
try:
    train_df, valid_df, test_df = make_dataframes(base_dir)
except ValueError as e:
    print(f"Error: {e}")

cover                    : 100%|[34m██████████████████████████████████████████████████████████[0m| 188/188 [00:00<00:00, 31546.21files/s][0m
defense                  : 100%|[34m█████████████████████████████████████████████████████████[0m| 192/192 [00:00<00:00, 210537.61files/s][0m
flick                    : 100%|[34m█████████████████████████████████████████████████████████[0m| 181/181 [00:00<00:00, 270648.49files/s][0m
hook                     : 100%|[34m█████████████████████████████████████████████████████████[0m| 181/181 [00:00<00:00, 272592.11files/s][0m
late_cut                 : 100%|[34m█████████████████████████████████████████████████████████[0m| 182/182 [00:00<00:00, 284943.38files/s][0m
lofted                   : 100%|[34m█████████████████████████████████████████████████████████[0m| 198/198 [00:00<00:00, 295331.50files/s][0m
pull                     : 100%|[34m█████████████████████████████████████████████████████████[0m| 179/179 [00:00<00:00, 290437.30files

train_df length: 1321   valid_df length: 283   test_df length: 284


In [3]:
class VideoDataGenerator(keras.utils.Sequence):
    def __init__(self, dataframe, batch_size, num_frames, num_classes, img_size=(224, 224), shuffle=True):
        self.df = dataframe
        self.batch_size = batch_size
        self.num_frames = num_frames
        self.num_classes = num_classes
        self.img_size = img_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.df) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_df = self.df.iloc[idx * self.batch_size:(idx + 1) * self.batch_size]

        batch_videos = []
        batch_labels = []

        for _, row in batch_df.iterrows():
            video = extract_frames(row['filepaths'], self.num_frames, self.img_size)
            if video is not None:
                batch_videos.append(video)
                label = keras.utils.to_categorical(row['label_index'], num_classes=self.num_classes)
                batch_labels.append(label)

        if not batch_videos:
            return self.__getitem__((idx + 1) % len(self))
        return np.array(batch_videos), np.array(batch_labels)

    def on_epoch_end(self):
        if self.shuffle:
            self.df = self.df.sample(frac=1).reset_index(drop=True)

In [4]:
classes = sorted(train_df['labels'].unique())
class_to_index = {cls: idx for idx, cls in enumerate(classes)}

for df in [train_df, valid_df, test_df]:
    df['label_index'] = df['labels'].map(class_to_index)

num_classes = len(classes)
batch_size = 16
num_frames = 15
img_size = (224, 224)

train_gen = VideoDataGenerator(train_df, batch_size, num_frames, num_classes, img_size=img_size)
valid_gen = VideoDataGenerator(valid_df, batch_size, num_frames, num_classes, img_size=img_size, shuffle=False)

In [6]:
def make_model(num_frames, img_size, num_classes, lr=0.001):
    input_shape = (num_frames, *img_size, 3)
    model = keras.Sequential([
        keras.layers.Conv3D(32, (3, 3, 3), activation='relu', padding='same', input_shape=input_shape),
        keras.layers.MaxPooling3D((1, 2, 2)),
        keras.layers.Conv3D(64, (3, 3, 3), activation='relu', padding='same'),
        keras.layers.MaxPooling3D((1, 2, 2)),
        keras.layers.Conv3D(128, (3, 3, 3), activation='relu', padding='same'),
        keras.layers.MaxPooling3D((1, 2, 2)),
        keras.layers.Conv3D(256, (3, 3, 3), activation='relu', padding='same'),
        keras.layers.GlobalAveragePooling3D(),
        keras.layers.Dense(512, activation='relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

num_frames = 15
num_classes = len(train_df['labels'].unique())
img_size = (224, 224)

print(f"Number of frames: {num_frames}")
print(f"Image size: {img_size}")
print(f"Number of classes: {num_classes}")

batch_size = 16
train_gen = VideoDataGenerator(train_df, batch_size, num_frames, num_classes, img_size=img_size)
valid_gen = VideoDataGenerator(valid_df, batch_size, num_frames, num_classes, img_size=img_size, shuffle=False)
model = make_model(num_frames, (224, 224), num_classes)
model.summary()

Number of frames: 15
Image size: (224, 224)
Number of classes: 10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)
epochs = 20
callbacks = [early_stop, reduce_lr]

history = model.fit(
    train_gen,
    epochs=epochs,
    verbose=1,
    callbacks=callbacks,
    validation_data=valid_gen,
    validation_steps=None,
    shuffle=False,
    initial_epoch=0
)

Epoch 1/20


  self._warn_if_super_not_called()


[1m44/83[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m1:38:01[0m 151s/step - accuracy: 0.1175 - loss: 2.3039

In [None]:
def tr_plot(tr_data, start_epoch):
    tacc = tr_data.history['accuracy']
    tloss = tr_data.history['loss']
    vacc = tr_data.history['val_accuracy']
    vloss = tr_data.history['val_loss']
    Epoch_count = len(tacc) + start_epoch
    Epochs = [i for i in range(start_epoch, Epoch_count)]
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))
    ax1.plot(Epochs, tloss, 'r', label='Training loss')
    ax1.plot(Epochs, vloss, 'g', label='Validation loss')
    ax1.set_title('Training and Validation Loss')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Loss')
    ax1.legend()
    ax2.plot(Epochs, tacc, 'r', label='Training Accuracy')
    ax2.plot(Epochs, vacc, 'g', label='Validation Accuracy')
    ax2.set_title('Training and Validation Accuracy')
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('Accuracy')
    ax2.legend()
    plt.tight_layout()
    plt.show()

tr_plot(history, 0)

In [None]:
def predictor(test_gen):
    y_pred = []
    y_true = test_gen.df['label_index'].values
    classes = list(test_gen.df['labels'].unique())
    class_count = len(classes)
    errors = 0
    preds = model.predict(test_gen, verbose=1)
    tests = len(preds)
    for i, p in enumerate(preds):
        pred_index = np.argmax(p)
        true_index = test_gen.df['label_index'].iloc[i]
        if pred_index != true_index:
            errors += 1
        y_pred.append(pred_index)

    acc = (1 - errors / tests) * 100
    print(f'There were {errors} errors in {tests} tests for an accuracy of {acc:.2f}%')
    y_pred = np.array(y_pred)
    y_true = np.array(y_true)
    f1score = f1_score(y_true, y_pred, average='weighted') * 100
    if class_count <= 30:
        cm = confusion_matrix(y_true, y_pred)
        plt.figure(figsize=(12, 8))
        sns.heatmap(cm, annot=True, vmin=0, fmt='g', cmap='Blues', cbar=False)
        plt.xticks(np.arange(class_count) + .5, classes, rotation=90)
        plt.yticks(np.arange(class_count) + .5, classes, rotation=0)
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.title("Confusion Matrix")
        plt.show()

    clr = classification_report(y_true, y_pred, target_names=classes, digits=4)
    print("Classification Report:\n----------------------\n", clr)
    return errors, tests, f1score

errors, tests, f1score = predictor(test_gen)

In [None]:
name = f'CRICKET-{len(train_gen.df["labels"].unique())}-({224}x{224})'
save_id = f'{name}-{f1score:.2f}.h5'
model_save_loc = os.path.join('/content/drive/MyDrive', save_id)
model.save(model_save_loc)
print(f'Model was saved as {model_save_loc}')