In [3]:
# First, create the shell script content
shell_script = '''set -e
LOGFILE=test.log
(
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
pushd "${SCRIPT_DIR}/.." > /dev/null

videos_folder_path="/data/videos/utterances_final"
frames_folder_path="/data/frames/utterances_final"
ext=mp4

mkdir -p "${frames_folder_path}"

for video_file_path in "${videos_folder_path}"/*."${ext}"; do
    slash_and_video_file_name="${video_file_path:${#videos_folder_path}}"
    slash_and_video_file_name_without_extension="${slash_and_video_file_name%.${ext}}"
    video_frames_folder_path="${frames_folder_path}${slash_and_video_file_name_without_extension}"
    mkdir -p "${video_frames_folder_path}"
    ffmpeg -i "${video_file_path}" "${video_frames_folder_path}/%05d.jpg"
done

popd > /dev/null) >& $LOGFILE
'''

# Write the script to a file
with open('extract_frames.sh', 'w') as f:
    f.write(shell_script)

# Make the script executable
!chmod +x extract_frames.sh

# Install ffmpeg if not already installed
!apt-get update
!apt-get install -y ffmpeg

In [None]:
import requests
import zipfile
import os

# Create the data/videos directory if it doesn't exist
os.makedirs('data/videos', exist_ok=True)

# URL of the dataset
url = "https://huggingface.co/datasets/MichiganNLP/MUStARD/resolve/main/mmsd_raw_data.zip"
url = 'http://nlp.stanford.edu/data/glove.840B.300d.zip'
# Download the zip file
print("Downloading zip file...")
response = requests.get(url)

# Save the zip file temporarily
zip_path = 'data/' + url.split('/')[-1]
with open(zip_path, 'wb') as f:
    f.write(response.content)

# Extract the contents to data/videos
print("Extracting files...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall('data/videos')

# Remove the temporary zip file
os.remove(zip_path)

print("Download and extraction complete!")

Downloading zip file...


In [None]:
'https://huggingface.co/datasets/MichiganNLP/MUStARD/resolve/main/BERT_text_features.zip'

In [5]:
!bash extract_frames.sh

In [8]:
import json
import os


def create_sarcasm_data_json(frames_dir="data/frames/utterances_final", output_path="data/sarcasm_data.json"):
    """
    Create a sarcasm_data.json file by scanning the frames directory.
    Creates a simple dictionary with video IDs as keys and empty dictionaries as values.
    """
    # Get all subdirectories in the frames directory
    video_ids = [d for d in os.listdir(frames_dir) if os.path.isdir(os.path.join(frames_dir, d))]

    # Create dictionary with video IDs as keys
    sarcasm_data = {video_id: {} for video_id in video_ids}

    # Save to JSON file
    with open(output_path, "w") as f:
        json.dump(sarcasm_data, f, indent=4)

    print(f"Created {output_path} with {len(video_ids)} video entries")
    return sarcasm_data


if __name__ == "__main__":
    create_sarcasm_data_json()


Created data/sarcasm_data.json with 690 video entries


In [1]:
!pip install torch pillow torchvision h5py tqdm -q

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [2]:
import json
import os
from typing import Callable, Dict

import PIL.Image
import torch
import torch.utils.data


class SarcasmDataset(torch.utils.data.Dataset):
    """Dataset of Sarcasm videos."""

    FRAMES_DIR_PATH = "data/frames/utterances_final"

    def __init__(self, transform: Callable = None, videos_data_path: str = "data/sarcasm_data.json", check_missing_videos: bool = True) -> None:
        self.transform = transform

        with open(videos_data_path) as file:
            videos_data_dict = json.load(file)

        for video_id in list(videos_data_dict):  # Convert to list to possibly remove items.
            video_folder_path = self._video_folder_path(video_id)
            if not os.path.exists(video_folder_path):
                if check_missing_videos:
                    raise FileNotFoundError(f"Directory {video_folder_path} not found, which was referenced in {videos_data_path}")
                else:
                    del videos_data_dict[video_id]

        self.video_ids = list(videos_data_dict)

        self.frame_count_by_video_id = {video_id: len(os.listdir(self._video_folder_path(video_id))) for video_id in self.video_ids}

    @staticmethod
    def _video_folder_path(video_id: str) -> str:
        return os.path.join(SarcasmDataset.FRAMES_DIR_PATH, video_id)

    @staticmethod
    def features_file_path(model_name: str, layer_name: str) -> str:
        return f"data/features/utterances_final/{model_name}_{layer_name}.hdf5"

    def __getitem__(self, index) -> Dict[str, object]:
        video_id = self.video_ids[index]

        frames = None

        video_folder_path = self._video_folder_path(video_id)
        for i, frame_file_name in enumerate(os.listdir(video_folder_path)):
            frame = PIL.Image.open(os.path.join(video_folder_path, frame_file_name))
            if self.transform:
                frame = self.transform(frame)

            if frames is None:
                frames = torch.empty((self.frame_count_by_video_id[video_id], *frame.size()))  # noqa

            frames[i] = frame  # noqa

        return {"id": video_id, "frames": frames}

    def __len__(self) -> int:
        return len(self.video_ids)


In [3]:
from typing import Any, Tuple

import h5py
import torch
import torch.nn
import torch.utils.data
import torchvision
from overrides import overrides
from tqdm import tqdm
# 
# from dataset import SarcasmDataset

# noinspection PyUnresolvedReferences
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def pretrained_resnet152() -> torch.nn.Module:
    resnet152 = torchvision.models.resnet152(pretrained=True)
    resnet152.eval()
    for param in resnet152.parameters():
        param.requires_grad = False
    return resnet152


def save_resnet_features() -> None:
    transforms = torchvision.transforms.Compose(
        [
            torchvision.transforms.Resize(256),
            torchvision.transforms.CenterCrop(224),
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
    )
    dataset = SarcasmDataset(transform=transforms)

    resnet = pretrained_resnet152().to(DEVICE)

    class Identity(torch.nn.Module):
        def forward(self, input: torch.Tensor):
            return input

    resnet.fc = Identity()  # Trick to avoid computing the fc1000 layer, as we don't need it here.

    with (
        h5py.File(SarcasmDataset.features_file_path("resnet", "res5c"), "w") as res5c_features_file,
        h5py.File(SarcasmDataset.features_file_path("resnet", "pool5"), "w") as pool5_features_file,
    ):
        for video_id in dataset.video_ids:
            video_frame_count = dataset.frame_count_by_video_id[video_id]
            res5c_features_file.create_dataset(video_id, shape=(video_frame_count, 2048, 7, 7))
            pool5_features_file.create_dataset(video_id, shape=(video_frame_count, 2048))

        res5c_output = None

        def avg_pool_hook(_module: torch.nn.Module, input_: Tuple[torch.Tensor], _output: Any) -> None:
            nonlocal res5c_output
            res5c_output = input_[0]

        resnet.avgpool.register_forward_hook(avg_pool_hook)

        total_frame_count = sum(dataset.frame_count_by_video_id[video_id] for video_id in dataset.video_ids)
        with tqdm(total=total_frame_count, desc="Extracting ResNet features") as progress_bar:
            for instance in torch.utils.data.DataLoader(dataset):
                video_id = instance["id"][0]
                frames = instance["frames"][0].to(DEVICE)

                batch_size = 32
                for start_index in range(0, len(frames), batch_size):
                    end_index = min(start_index + batch_size, len(frames))
                    frame_ids_range = range(start_index, end_index)
                    frame_batch = frames[frame_ids_range]

                    avg_pool_value = resnet(frame_batch)

                    res5c_features_file[video_id][frame_ids_range] = res5c_output.cpu()  # noqa
                    pool5_features_file[video_id][frame_ids_range] = avg_pool_value.cpu()

                    progress_bar.update(len(frame_ids_range))

In [4]:
save_resnet_features()

Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /root/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth
100%|██████████| 230M/230M [00:03<00:00, 79.5MB/s] 
Extracting ResNet features: 100%|██████████| 89066/89066 [09:25<00:00, 157.61it/s]
