In [None]:
!pip install -q pytorchvideo evaluate

In [None]:
pip install -q pyarrow==14.0.1

In [None]:
pip install -q transformers --upgrade

In [1]:
pip install -q torch==2.0.1 torchvision==0.15.2 --extra-index-url https://download.pytorch.org/whl/cu118 xformers==0.0.21

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchaudio 2.1.2+cpu requires torch==2.1.2, but you have torch 2.0.1+cu118 which is incompatible.
torchtext 0.16.2+cpu requires torch==2.1.2, but you have torch 2.0.1+cu118 which is incompatible.[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [1]:
import os
import pandas as pd

root_path = "/kaggle/input/my-data"
folder_list = os.listdir(root_path)
label_list = [path for path in folder_list if not path.endswith((".csv"))]
total_df = pd.read_csv(os.path.join(root_path,"train.csv"))

total_df.reset_index(drop = True, inplace = True)
total_df['label'].value_counts()

label
normal      222
theft       156
fight       101
accident     97
Name: count, dtype: int64

In [2]:
from sklearn.model_selection import train_test_split

def correct_file_path(file_name: str, root_path: str):
    return os.path.join(root_path, file_name.replace('\\', '/'))

def preprocess_meta_df(df, root_path, label2id):
    df.rename(columns={"video_name": "video_path"}, inplace=True)
    df['video_path'] = df['video_path'].apply(lambda x: correct_file_path(x, root_path))
    df['label'] = df['label'].apply(lambda x: label2id[x])
    return df

train_meta_df, test_meta_df = train_test_split(total_df, test_size=0.2, random_state=42)

label_list = list(set(train_meta_df['label']))
class_labels = sorted(label_list)
label2id = {label: i for i, label in enumerate(class_labels)}
id2label = {i: label for label, i in label2id.items()}

print(f"Unique classes: {list(label2id.keys())}.")

train_meta_df = preprocess_meta_df(train_meta_df, root_path, label2id)
test_meta_df = preprocess_meta_df(test_meta_df, root_path, label2id)

print("Splitted data:", len(train_meta_df), len(test_meta_df))

Unique classes: ['accident', 'fight', 'normal', 'theft'].
Splitted data: 460 116


In [3]:
import torch
import pytorchvideo.data
from torch.utils.data import Dataset
from transformers import VideoMAEImageProcessor, VideoMAEForVideoClassification, VivitConfig
from transformers import VivitImageProcessor, VivitModel, VivitForVideoClassification
import torch.nn as nn

from pytorchvideo.transforms import (
    ApplyTransformToKey,
    Normalize,
    RandomShortSideScale,
    RemoveKey,
    ShortSideScale,
    UniformTemporalSubsample,
)

from torchvision.transforms import (
    Compose,
    Lambda,
    RandomCrop,
    RandomHorizontalFlip,
    Resize,
)

model_checkpoint = "google/vivit-b-16x2-kinetics400"

model1 = VivitForVideoClassification.from_pretrained(model_checkpoint, label2id=label2id, id2label=id2label, ignore_mismatched_sizes=True)
conf = model1.config
conf.hidden_size = 492
conf.num_hidden_layers = 10
model = VivitForVideoClassification.from_pretrained(model_checkpoint, config = conf, ignore_mismatched_sizes=True)
image_processor = VivitImageProcessor.from_pretrained(model_checkpoint)

2024-08-26 06:31:18.034918: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-26 06:31:18.034974: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-26 06:31:18.039913: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
  return self.fget.__get__(instance, owner)()
Some weights of VivitForVideoClassification were not initialized from the model checkpoint at google/vivit-b-16x2-kinetics400 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([400, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classi

In [4]:
class CustomVideoDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        video_path = row['video_path']
        label = row['label']
        return video_path, label

mean = image_processor.image_mean
std = image_processor.image_std

if "shortest_edge" in image_processor.size:
    height = width = image_processor.size["shortest_edge"]
else:
    height = image_processor.size["height"]
    width = image_processor.size["width"]

resize_to = (model.config.image_size, model.config.image_size)

num_frames_to_sample = model.config.num_frames
clip_duration = 8

train_transform = Compose(
    [
        ApplyTransformToKey(
            key="video",
            transform=Compose(
                [
                    UniformTemporalSubsample(num_frames_to_sample),
                    Lambda(lambda x: x / 255.0),
                    Normalize(mean, std),
                    RandomShortSideScale(min_size=256, max_size=320),
                    Resize(resize_to),
                    RandomHorizontalFlip(p=0.5),
                ]
            ),
        ),
    ]
)

val_transform = Compose(
    [
        ApplyTransformToKey(
            key="video",
            transform=Compose(
                [
                    UniformTemporalSubsample(num_frames_to_sample),
                    Lambda(lambda x: x / 255.0),
                    Normalize(mean, std),
                    Resize(resize_to),
                ]
            ),
        ),
    ]
)

train_custom_dataset = CustomVideoDataset(train_meta_df)
train_labeled_video_paths = [(video_path, {'label': label}) for video_path, label in train_custom_dataset]

test_custom_dataset = CustomVideoDataset(test_meta_df)
test_labeled_video_paths = [(video_path, {'label': label}) for video_path, label in test_custom_dataset]

In [5]:
import imageio
import numpy as np
from IPython.display import Image

train_dataset = pytorchvideo.data.LabeledVideoDataset(
    labeled_video_paths =train_labeled_video_paths,
    clip_sampler=pytorchvideo.data.make_clip_sampler("random", clip_duration),
    decode_audio=False,
    transform=train_transform,
)

test_dataset = pytorchvideo.data.LabeledVideoDataset(
    labeled_video_paths =test_labeled_video_paths,
    clip_sampler=pytorchvideo.data.make_clip_sampler("uniform", clip_duration),
    decode_audio=False,
    transform=val_transform,
)

def unnormalize_img(img):
    img = (img * std) + mean
    img = (img * 255).astype("uint8")
    return img.clip(0, 255)

In [6]:
class CustomVideoDataset1(Dataset):

    def __init__(self, data):
        super().__init__()
        self.train_dataset = data

    def __len__(self):
        return self.train_dataset.num_videos

    def __getitem__(self, idx):
        video=next(iter(self.train_dataset))
        
        return {"video": video['video'],
                "label": video['label']}

In [7]:
custom_train_data = CustomVideoDataset1(train_dataset)
custom_test_data = CustomVideoDataset1(test_dataset)

In [8]:
from torch.utils.data import DataLoader

In [9]:
train_loader = DataLoader(custom_train_data, batch_size = 1)
test_loader = DataLoader(custom_test_data, batch_size = 1)

In [10]:
import torch
import torch.nn.functional as F
import torch.optim as optim

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
optimizer = optim.AdamW(model.parameters(), lr=5e-5)

In [12]:
epoch = 2

In [13]:
final_metrics = []

In [None]:
# loss_train = []
# loss_eval = []
# acc_train = []
# acc_eval = []

In [14]:
from tqdm import tqdm
import torch
import json

file_path = 'metrics.json'

In [None]:
checkpoint = torch.load('/kaggle/input/model3/pytorch/default/1/model-3.pt')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

In [None]:
with open('/kaggle/input/metrics/metrics.json', 'r') as json_file:
    final_metrics = json.load(json_file)

In [None]:
file_path = '/kaggle/working/metrics.json'
with open(file_path, 'r') as json_file:
    final_metrics = json.load(json_file)

In [15]:
final_metrics

[]

In [None]:
for i in range(epoch):
    metrics = {}
    model.train()
    train_losses = 0.0
    train_sum = 0.0
    batch_iterator=tqdm(train_loader, desc=f"Epoch {i+1:02d}")
    for sample in batch_iterator:
        video, label = sample['video'], sample['label']
        
        video = video.to(device)
        label = torch.tensor([label], device=device)

        optimizer.zero_grad() 
        train_logits = model(video.permute(0, 2, 1, 3, 4))['logits']
        
        if train_logits.argmax() == label:
            train_sum +=1
                
        train_loss = F.cross_entropy(train_logits, label)
        train_losses += train_loss.item()
        
        
        train_loss.backward() 
        optimizer.step() 
    
    avg_train_loss = train_losses / train_dataset.num_videos
    train_acc = train_sum / train_dataset.num_videos
#     loss_train.append(float(avg_train_loss))
#     acc_train.append(float(train_acc))
    print(f'train loss: {avg_train_loss}')
    print(f'train acc: {train_acc}')
              
    model.eval()
    eval_losses = 0.0
    eval_sum = 0.0
    for sample in test_loader: 
        video, label = sample['video'], sample['label']
        
        video = video.to(device)
        label = torch.tensor([label], device=device)
        
        with torch.no_grad():
            eval_logits = model(video.permute(0, 2, 1, 3, 4))['logits']
            eval_loss = F.cross_entropy(eval_logits, label).item()
            eval_losses += eval_loss
            
            if eval_logits.argmax() == label:
                eval_sum +=1
    with torch.no_grad():        
        avg_eval_loss = eval_losses / test_dataset.num_videos
        eval_acc = eval_sum / test_dataset.num_videos
    #     loss_eval.append(float(avg_eval_loss)) 
    #     acc_eval.append(float(eval_acc))
        print(f'val loss: {avg_eval_loss}')
        print(f'val acc: {eval_acc}')

        metrics['acc_train'] = train_acc
        metrics['loss_train'] = avg_train_loss
        metrics['acc_eval'] = eval_acc
        metrics['loss_eval'] = avg_eval_loss
        final_metrics.append(metrics)

        torch.cuda.empty_cache()

        PATH = f"model-{i+1}.pt"
        torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                }, PATH)

        with open(file_path, 'w') as json_file:
            json.dump(final_metrics, json_file)


Epoch 01: 100%|██████████| 460/460 [15:24<00:00,  2.01s/it]


train loss: 1.479778262519318
train acc: 0.34130434782608693
val loss: 1.4058843393777978
val acc: 0.3793103448275862


Epoch 02:  20%|██        | 93/460 [03:29<13:37,  2.23s/it]

In [None]:
torch.save(model, 'model3.pt')

In [None]:
final_metrics

In [None]:
label

In [None]:
import os
os.remove(f'/kaggle/working/metrics.json')

In [None]:
os.rename('/kaggle/working/model-2.pt', '/kaggle/working/model-1.pt')