In [1]:

import os
import numpy as np



def load_video_features(base_dir):
    data = []
    labels = []
    class_names = sorted(os.listdir(base_dir))

    for class_idx,class_name in enumerate(class_names):
        class_dir = os.path.join(base_dir, class_name)
        video_files = sorted(os.listdir(class_dir))
        print (f'processing class={class_idx}:{class_name}')

        for video_file in video_files:
            video_path = os.path.join(class_dir, video_file)
            features = np.load(video_path)
            features = features.reshape(features.shape[0], -1) 
            # video_features = np.load(video_path, allow_pickle=True)
            # features = video_features.item()['resnet'].reshape(-1)
            data.append(features)
            labels.append(class_idx)

    return data, labels,class_names

# Example usage
base_dir ='F:\Downloads\RIP_dataset\VGG16_Training_Features\\frontal_view'
video_features_train, video_labels_train, class_names = load_video_features(base_dir)
video_labels_train=np.array(video_labels_train)
print(video_features_train[0].shape)


processing class=0:Left Lane Change
processing class=1:Left Turn
processing class=2:Right Lane Change
processing class=3:Right Turn
processing class=4:Slow-Stop
processing class=5:Straight
(300, 512)


In [2]:
def pad_videos(video_features):
    max_frames = max(video.shape[0] for video in video_features)
    max_frames=300
    padded_videos = []

    for video in video_features:
        num_frames = video.shape[0]
        if num_frames < max_frames:
            padding = np.zeros((max_frames - num_frames, video.shape[1]))
            #padding = np.zeros((max_frames - num_frames))
            padded_video = np.concatenate((video, padding), axis=0)
        elif num_frames > max_frames:
            selected_indices =sorted(np.random.choice(num_frames, max_frames, replace=False))
            #selected_indices = np.linspace(0, num_frames - 1, max_frames).astype(int)
            padded_video = video[selected_indices]
        else:
            padded_video = video
        padded_videos.append(padded_video)

    return np.array(padded_videos)


In [3]:
X_vgg_train= pad_videos(video_features_train)
print("trin padded feature: ",X_vgg_train.shape)

trin padded feature:  (500, 300, 512)


In [6]:
X_train_flat = X_vgg_train.reshape(X_vgg_train.shape[0], -1)
print("X_train:",X_train_flat.shape)

X_train: (500, 153600)


In [8]:
y_train=video_labels_train

In [9]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_flat, y_train)
print("Original shape:", X_train_flat.shape)
print("Resampled shape:", X_train_resampled.shape)
print("Original labels distribution:", np.bincount(y_train))
print("Resampled labels distribution:", np.bincount(y_train_resampled))

Original shape: (500, 153600)
Resampled shape: (960, 153600)
Original labels distribution: [ 29 160  39 154  44  74]
Resampled labels distribution: [160 160 160 160 160 160]


In [11]:
y_train=y_train_resampled

In [12]:
from sklearn import svm
from sklearn.multiclass import OneVsRestClassifier
from sklearn.calibration import CalibratedClassifierCV

class_weights = {0: 6.0, 1: 1.5, 2: 2.5, 3: 0.8, 4: 3.0, 5: 1}
ova_clf = OneVsRestClassifier(svm.SVC(kernel='rbf', probability=True, class_weight=class_weights,random_state=42))
ova_clf.fit(X_train_resampled, y_train)


In [None]:
import joblib
model=joblib.load('ova_clf_model71.pkl')

<h2> setting up validation data

In [13]:
import os
import numpy as np
def load_video_features(base_dir):
    data = []
    labels = []
    files=[]
    class_names = sorted(os.listdir(base_dir))

    for class_idx, class_name in enumerate(class_names):
        class_dir = os.path.join(base_dir, class_name)
        video_files = sorted(os.listdir(class_dir))
        print (f'processing class={class_idx}:{class_name}')

        for video_file in video_files:
            video_path = os.path.join(class_dir, video_file)
            features = np.load(video_path)
            features = features.reshape(features.shape[0], -1)
            # video_features = np.load(video_path, allow_pickle=True)
            # features = video_features.item()['resnet']  # Assuming features are stored in .npy format
            data.append(features)
            labels.append(class_idx)
            files.append(os.path.splitext(video_file)[0])

    return data, labels,files

# Example usage
base_dir ='F:\Downloads\RIP_dataset\VGG16_val_features\\frontal_view'
video_features_val, video_labels_val, files = load_video_features(base_dir)
video_labels_val=np.array(video_labels_val)
print(video_features_val[0].shape)
print(list(files[i] for i in range(5)))

processing class=0:Left Lane Change
processing class=1:Left Turn
processing class=2:Right Lane Change
processing class=3:Right Turn
processing class=4:Slow-Stop
processing class=5:Straight
(210, 512)
['2d0536d0-b58e-4a83-a72a-bd0448b59d57', '2e1c1e90-7004-4303-b476-6c8d28d21510', '3a618b49-0021-41ef-9c98-ba089a30a597', '5a794d03-309b-4620-a96e-b4d3c2c8e8e2', '626c3338-70dd-40db-826f-3b7f9bfb11fa']


In [14]:
X_vgg_val= pad_videos(video_features_val)
print("val padded feature: ",X_vgg_val.shape)


val padded feature:  (200, 300, 512)


In [15]:
del(video_features_val)
X_val_flat = X_vgg_val.reshape(X_vgg_val.shape[0], -1)
print("X_val:",X_val_flat.shape)

X_val: (200, 153600)


In [17]:
y_val=video_labels_val

In [18]:
from sklearn.metrics import accuracy_score

proba = ova_clf.predict_proba(X_val_flat)  # Shape: (200*810, num_classes)
# Get the class with the highest probability for each frame
pred = np.argmax(proba, axis=1)
print(pred)


accuracy = accuracy_score(y_val,pred)
print(f'Accuracy: {accuracy * 100:.2f}%')


[0 1 1 2 2 0 5 0 2 4 2 2 2 0 1 2 2 1 2 3 1 1 1 1 3 1 0 1 3 0 1 1 1 3 3 1 1
 3 1 3 1 1 3 1 1 1 1 1 1 1 1 1 1 1 2 1 3 4 2 2 2 5 2 4 4 1 0 2 4 0 1 2 2 3
 3 3 3 3 3 1 3 3 3 1 3 3 3 5 3 3 1 2 3 3 5 3 3 3 3 3 3 3 3 1 3 3 3 3 1 3 1
 3 3 3 3 3 3 3 3 3 2 3 3 4 4 4 4 4 1 4 2 4 1 2 4 2 4 3 2 3 3 3 4 4 3 4 3 4
 1 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 4 5 5 5 5 5 5 5 5
 5 5 5 5 5 4 5 5 5 5 5 5 5 5 5]
Accuracy: 69.50%


In [21]:
from sklearn.metrics import f1_score
f1 = f1_score(y_val, pred, average='micro')
print(f'F1 Score: {f1:.2f}')

F1 Score: 0.70


In [39]:
import numpy as np
import pandas as pd


num_classes = 6

# Convert class indices to one-hot encoding
one_hot_pred = np.zeros((len(pred), num_classes))
one_hot_pred[np.arange(len(pred)), pred] = 1

# Column names
columns = ['frontal_view_video_name', 'Left Lane Change', 'Left Turn', 'Right Lane Change', 'Right Turn', 'Slow-Stop', 'Straight']

# Create a DataFrame
df = pd.DataFrame(one_hot_pred, columns=columns[1:])
df.insert(0, 'frontal_view_video_name', files)

# Save to CSV
csv_file_path = 'task1_val_predictions.csv'
df.to_csv(csv_file_path, index=False)

print(f"CSV file saved to {csv_file_path}")


CSV file saved to task1_val_predictions.csv


In [22]:
import joblib

# Save the model
joblib.dump(ova_clf, 'ova_clf_model70_.pkl')

['ova_clf_model70_.pkl']

<h2> Test result

In [31]:
import os
import numpy as np
def load_video_features(base_dir):
    data = []
    labels = []
    files=[]
    video_files = sorted(os.listdir(base_dir))
    for video_file in video_files:
        video_path = os.path.join(base_dir, video_file)
        features = np.load(video_path)
        features = features.reshape(features.shape[0], -1)
        # video_features = np.load(video_path, allow_pickle=True)
        #features = video_features.item()['resnet']  # Assuming features are stored in .npy format
        data.append(features)
        files.append(os.path.splitext(video_file)[0])

    return data,files

# Example usage
base_dir ='F:\Downloads\RIP_dataset\VGG16_test_features\\frontal_view\\VGG16_features'
video_features_test, files = load_video_features(base_dir)
print(video_features_test[0].shape)
print(list(files[i] for i in range(5)))

(82, 512)
['004ed8d4-5be9-4963-bdb3-8aace317cd74', '00d5a64d-7e78-4662-9f00-c2a7c69cace8', '01bd1017-7748-4e0b-bf57-d9012563695a', '02d9a67f-cd75-4880-9e41-08b9369e5ded(1)', '02d9a67f-cd75-4880-9e41-08b9369e5ded']


In [32]:
X_vgg_test= pad_videos(video_features_test)
print("val padded feature: ",X_vgg_test.shape)


val padded feature:  (299, 300, 512)


In [33]:
del(video_features_test)
X_test_flat = X_vgg_test.reshape(X_vgg_test.shape[0], -1)
print("X_val:",X_test_flat.shape)

X_val: (299, 153600)


In [36]:
from sklearn.metrics import accuracy_score

proba = ova_clf.predict_proba(X_test_flat)  
test_pred = np.argmax(proba, axis=1)
print(test_pred)


[1 3 5 2 2 1 1 3 0 1 4 4 2 4 2 3 1 3 5 5 5 3 5 3 3 1 3 4 1 1 3 4 5 4 1 5 3
 1 2 5 1 5 5 2 2 5 2 4 0 1 1 2 2 2 5 3 3 1 2 2 5 2 5 1 1 5 5 1 1 1 5 1 1 1
 4 3 1 1 1 5 0 5 5 1 4 3 0 4 2 5 4 3 1 4 5 4 1 3 5 5 1 5 1 1 5 1 5 2 5 0 4
 0 1 4 0 1 1 3 3 3 5 5 2 1 0 5 5 2 1 4 2 5 5 1 1 1 2 2 1 1 1 4 1 2 1 4 1 1
 4 3 1 5 5 3 1 1 3 4 5 5 3 1 1 1 2 3 5 2 4 4 2 5 3 1 4 5 5 4 2 1 1 4 1 3 3
 5 4 4 3 3 1 3 2 1 3 3 1 4 1 1 5 1 1 3 1 5 5 1 2 1 3 1 3 4 2 1 5 2 5 1 5 1
 1 5 5 3 4 2 5 5 1 5 1 2 5 4 5 1 5 1 2 1 4 5 1 4 2 1 5 1 1 5 1 3 3 4 2 2 5
 5 5 5 5 1 5 1 5 1 5 1 0 5 1 1 5 4 2 5 4 1 5 5 3 5 1 2 3 1 2 1 5 4 5 1 4 2
 1 1 2]


In [37]:
import numpy as np
import pandas as pd


num_classes = 6

# Convert class indices to one-hot encoding
one_hot_pred = np.zeros((len(test_pred), num_classes))
one_hot_pred[np.arange(len(test_pred)), test_pred] = 1

# Column names
columns = ['frontal_view_video_name', 'Left Lane Change', 'Left Turn', 'Right Lane Change', 'Right Turn', 'Slow-Stop', 'Straight']

# Create a DataFrame
df = pd.DataFrame(one_hot_pred, columns=columns[1:])
df.insert(0, 'frontal_view_video_name', files)

# Save to CSV
csv_file_path = 'task1_test_predictions.csv'
df.to_csv(csv_file_path, index=False)

print(f"CSV file saved to {csv_file_path}")


CSV file saved to task1_test_predictions.csv
