In [102]:
from itertools import islice
from pyexpat import features

import pandas as pd
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from joblib import Parallel, delayed
from scipy.stats import skew
from sklearn.preprocessing import MinMaxScaler
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern
from skimage.measure import shannon_entropy

In [103]:
train_df = pd.read_csv("./dataset/splits/train.csv", index_col='index')
test_df = pd.read_csv("./dataset/splits/test.csv", index_col='index')
val_df = pd.read_csv("./dataset/splits/validation.csv", index_col='index')
train_df.head()

Unnamed: 0_level_0,clip_name,clip_path,label,encoded_label
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,v_Diving_g03_c01.avi,./dataset/Diving/v_Diving_g03_c01.avi,Diving,0
1,v_Diving_g19_c03.avi,./dataset/Diving/v_Diving_g19_c03.avi,Diving,0
2,v_Diving_g03_c04.avi,./dataset/Diving/v_Diving_g03_c04.avi,Diving,0
3,v_Diving_g05_c04.avi,./dataset/Diving/v_Diving_g05_c04.avi,Diving,0
4,v_Diving_g15_c03.avi,./dataset/Diving/v_Diving_g15_c03.avi,Diving,0


In [104]:
CONFIG = {
    'frame_skip': 5,  # Process every 5th frame to speed up
    'resize_dim': (224, 224),
    'n_jobs': -1,  # Use all CPU cores for feature extraction

    'lbp_radius': 3,
    'lbp_points': 8,

    'gabor': {
        'ksize': 5,
        'sigma': 3,
        'theta': 1*np.pi/4,
        'lamda': 1*np.pi /4,
        'gamma':0.4,
        'phi': 0
    },

    'contour': {
        'count' : 1,
    }
}

In [105]:
class VideoExtractorFeature:
    def __init__(self, skip_frame:int =5):
        self.skip_frame = skip_frame
        self.scalar = MinMaxScaler()
        self.gabor_kernel = cv.getGaborKernel(
            (CONFIG['gabor']['ksize'], CONFIG['gabor']['ksize']),
            CONFIG['gabor']['sigma'],
            CONFIG['gabor']['theta'],
            CONFIG['gabor']['lamda'],
            CONFIG['gabor']['gamma'],
            CONFIG['gabor']['phi'],
        )

    def _get_frame_color_features(self, frame):
        hsv_frame = cv.cvtColor(frame, cv.COLOR_BGR2HSV)
        rgb_frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)

        features = {}
        for i, column_name in enumerate(['h', 's', 'v']):
            channel = hsv_frame[:, :, i]

            mean = np.mean(channel)
            std = np.std(channel)

            if std < 1e-6:
                skewness = 0
            else:
                skewness = skew(channel.flatten())
                if np.isnan(skewness):
                    skewness = 0

            features[f'moments_mean_{column_name}'] = mean
            features[f'moments_std_{column_name}'] = std
            features[f'moments_skew_{column_name}'] = skewness

        avg_rgb  = np.mean(rgb_frame, axis=(0, 1))
        features['avg_color_r'] = avg_rgb[0]
        features['avg_color_g'] = avg_rgb[1]
        features['avg_color_b'] = avg_rgb[2]
        return features

    def _get_frame_glcm_features(self, grey_frame):
        features = {}
        for distance in [1, 3, 5]:
            for angle, orientation in zip([0, np.pi / 4, np.pi / 2, 3 * np.pi / 4], ["0", "45", "90", "135"]):
                glcm = graycomatrix(grey_frame, distances=[distance], angles=[angle], levels=256, symmetric=True, normed=True)
                features[f'glcm_contrast_{distance}_{orientation}'] = graycoprops(glcm, 'contrast')[0, 0]
                features[f'glcm_dissimilarity_{distance}_{orientation}'] = graycoprops(glcm, 'dissimilarity')[0, 0]
                features[f'glcm_homogeneity_{distance}_{orientation}'] = graycoprops(glcm, 'homogeneity')[0, 0]
                features[f'glcm_correlation_{distance}_{orientation}'] = graycoprops(glcm, 'correlation')[0, 0]
                features[f'glcm_energy_{distance}_{orientation}'] = graycoprops(glcm, 'energy')[0, 0]
        features['glcm_entropy'] = shannon_entropy(grey_frame)
        return features

    def _lbp_features(self, grey_frame):
        lbp = local_binary_pattern(grey_frame, CONFIG['lbp_points'], CONFIG['lbp_radius'], method='uniform')
        hist, _ = np.histogram(lbp.ravel(),
                               bins=np.arange(0, CONFIG['lbp_points'] + 3),
                               range=(0, CONFIG['lbp_points'] + 2))
        hist = hist.astype('float')
        hist /= (hist.sum() + 1e-7)

        features = {}
        for i in range(len(hist)):
            features[f'lbp_{i}'] = hist[i]
        return features

    def _get_gabor_features(self, grey_frame):
        gabor_features = cv.filter2D(grey_frame, cv.CV_8UC3, self.gabor_kernel)

        hist, _ = np.histogram(gabor_features.ravel(), bins=10,)
        hist = hist.astype('float')
        hist /= (hist.sum() + 1e-7)

        features = {}
        for i in range(len(hist)):
            features[f'gabor_{i}'] = hist[i]
        return features

    def _get_canny_features(self, grey_frame):
        sigma = 0.3
        median = np.median(grey_frame)
        lower = int(max(0, (1.0 - sigma) * median))
        upper = int(min(255, (1.0 + sigma) * median))
        edges = cv.Canny(grey_frame, lower, upper, edges=grey_frame)

        hist, _ = np.histogram(edges.ravel(), bins=10,)
        hist = hist.astype('float')
        hist /= (hist.sum() + 1e-7)

        features = {}
        for i in range(len(hist)):
            features[f'canny_{i}'] = hist[i]
        return features

    def _get_contour_features(self, grey_frame):
        sigma = 0.2
        median = np.median(grey_frame)
        lower = int(max(0, (1.0 - sigma) * median))
        upper = int(min(255, (1.0 + sigma) * median))
        ret, img_th = cv.threshold(grey_frame, lower, upper, cv.THRESH_BINARY_INV)
        contours, hierarchy = cv.findContours(img_th, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)

        features = {}
        for i in range(CONFIG['contour']['count']):
            features[f'contour_{i}_area'] = -1
            features[f'contour_{i}_perimeter'] = -1

        for i, contour in enumerate(islice(contours, min(CONFIG['contour']['count'], len(contours)))):
            m = cv.moments(contour)
            features[f'contour_{i}_area'] = m['m00']
            features[f'contour_{i}_perimeter'] = m['m01'] / (m['m00'] + 1e-7)
            # 'contour_aspect_ratio': m['m10'] / m['m00'],
            # 'contour_eccentricity': cv.contourArea(contours[0]) / cv.arcLength(contours[0], True),
            # 'contour_extent': cv.contourArea(contours[0]) / (grey_frame.shape[0] * grey_frame.shape[1]),
            # 'contour_solidity': cv.contourArea(contours[0]) / cv.contourArea(cv.convexHull(contours[0])),
        return features

    def _process_video(self, row: dict):
        video_id = row['index']
        video_path = row['clip_path']
        cap = cv.VideoCapture(video_path)
        features = []
        frame_cnt = 0

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            if frame_cnt % self.skip_frame == 0:
                frame = cv.resize(frame, CONFIG['resize_dim'], interpolation=cv.INTER_AREA)
                grey_frame = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
                feat = {'index': video_id}
                feat.update(self._get_frame_color_features(frame))
                feat.update(self._get_frame_glcm_features(grey_frame))
                feat.update(self._lbp_features(grey_frame))
                feat.update(self._get_gabor_features(grey_frame))
                feat.update(self._get_canny_features(grey_frame))
                feat.update(self._get_contour_features(grey_frame))
                features.append(feat)
            frame_cnt += 1

        cap.release()
        return features

    def _normalize_features(self, feature_df: pd.DataFrame, is_test: bool = False):
        feature_names = [col for col in feature_df.columns if col not in ['index', 'encoded_label']]
        feature_df[feature_names] = feature_df[feature_names].astype(float)
        if is_test:
            feature_df[feature_names] = self.scalar.transform(feature_df[feature_names])
        else:
            feature_df[feature_names] = self.scalar.fit_transform(feature_df[feature_names])
        return feature_df

    def _process_dataset(self, df: pd.DataFrame, is_test=False):
        rows = df.reset_index().to_dict('records')
        nested_results = Parallel(n_jobs=-1)(delayed(self._process_video)(row) for row in rows)

        flatten_results = [item for sub_list in nested_results for item in sub_list]
        feature_df = pd.DataFrame(flatten_results)
        return self._normalize_features(feature_df, is_test)

    def process_train_df(self, df: pd.DataFrame):
        process_df = self._process_dataset(df)
        merged_df = pd.merge(train_df, process_df, on='index', how='inner')
        output = merged_df['encoded_label']
        merged_df = merged_df.drop(columns=['index', 'clip_path', 'clip_name', 'label', 'encoded_label'])
        return merged_df, output

    def process_test_df(self, df: pd.DataFrame):
        process_df = self._process_dataset(df, True)
        merged_df = pd.merge(train_df, process_df, on='index', how='inner')
        merged_df = merged_df.drop(columns=['clip_path', 'clip_name', 'label'])
        return merged_df


In [106]:
extractor = VideoExtractorFeature()

In [107]:
X, Y = extractor.process_train_df(train_df)

In [108]:
M = extractor.process_test_df(val_df)

In [109]:
X.head()

Unnamed: 0,moments_mean_h,moments_std_h,moments_skew_h,moments_mean_s,moments_std_s,moments_skew_s,moments_mean_v,moments_std_v,moments_skew_v,avg_color_r,...,canny_2,canny_3,canny_4,canny_5,canny_6,canny_7,canny_8,canny_9,contour_0_area,contour_0_perimeter
0,0.437377,0.703766,0.469759,0.184177,0.320461,0.443086,0.672033,0.708757,0.265971,0.621076,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.417574,0.0,0.0
1,0.419381,0.691181,0.472115,0.173806,0.312256,0.452479,0.671253,0.7074,0.266933,0.624704,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.364412,0.0,0.0
2,0.445359,0.637138,0.472924,0.174393,0.315061,0.467496,0.670058,0.709306,0.268808,0.62434,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.364412,0.0,0.0
3,0.452045,0.706548,0.463693,0.168384,0.309778,0.46808,0.672741,0.704658,0.267192,0.63004,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.380345,0.0,0.0
4,0.458268,0.636646,0.467077,0.161103,0.303536,0.48331,0.673845,0.710806,0.268158,0.634046,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.342465,0.0,0.0


In [110]:
Y.head()

0    0
1    0
2    0
3    0
4    0
Name: encoded_label, dtype: int64

In [111]:
feature_names = [col for col in M.columns if col not in ['index', 'encoded_label']]

In [112]:
import optuna.visualization as vis
import cupy as cp
import optuna
from cuml.svm import SVC
import cudf

def validation_pipeline_score(model, merged_df):
    result = 0
    for video_id, group in merged_df.groupby('index'):
        df = group[feature_names]
        output = model.predict(df)
        final_ans = output.mode().iloc[0]
        expected = group['encoded_label'].values[0]
        # print(f"final_ans: {final_ans}, expected: {expected}, type of expected: {type(expected)}, type of final_ans: {type(final_ans)}, video_id: {video_id}")
        if final_ans == expected:
            result = result + 1
    return result


def objective_function(trial):
    params = {
        'C': trial.suggest_float('C', 1e-3, 1e3, log=True),
        'gamma': trial.suggest_float('gamma', 1e-4, 1e1, log=True),
        'kernel': trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly'])
    }
    model = SVC(**params)
    model.fit(X, Y)
    return validation_pipeline_score(model, M)


study = optuna.create_study(direction="maximize")
study.optimize(objective_function, n_trials=100, show_progress_bar=True, n_jobs=10)


[32m[I 2026-02-01 15:16:25,303][0m A new study created in memory with name: no-name-8827c52e-27f6-4102-992f-cab118e5a027[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2026-02-01 15:16:35,453][0m Trial 1 finished with value: 10.0 and parameters: {'C': 239.5408128102856, 'gamma': 0.0033334061162758814, 'kernel': 'linear'}. Best is trial 1 with value: 10.0.[0m
[32m[I 2026-02-01 15:16:35,481][0m Trial 7 finished with value: 10.0 and parameters: {'C': 0.4492096892433782, 'gamma': 0.034517470474381036, 'kernel': 'rbf'}. Best is trial 1 with value: 10.0.[0m
[32m[I 2026-02-01 15:16:35,548][0m Trial 2 finished with value: 11.0 and parameters: {'C': 0.04333385220494901, 'gamma': 0.07857920742158385, 'kernel': 'linear'}. Best is trial 2 with value: 11.0.[0m
[32m[I 2026-02-01 15:16:35,708][0m Trial 3 finished with value: 10.0 and parameters: {'C': 52.78414320632339, 'gamma': 0.023601683562697522, 'kernel': 'linear'}. Best is trial 2 with value: 11.0.[0m
[32m[I 2026-02-01 15:16:35,843][0m Trial 0 finished with value: 10.0 and parameters: {'C': 96.89010322284558, 'gamma': 0.14438067566458407, 'kernel': 'linear'}. Best is trial 2 with value: 1

In [113]:
vis.plot_param_importances(study)

In [114]:
vis.plot_optimization_history(study)