# RESNET 50 (Visual Analysis)

For linear regression for video analysis, we'll be using RESNET 50 for this 

Essentially for vision models we are looking at frame level embeddings -> then padding by some amount -> Then performing dimensionality reduction until we have two scalar values we're outputing

## Part 1) Data Preparation

Loading in the training and validation sets for storing the models.

In [1]:
import pandas as pd

# Load data
train_df = pd.read_csv('train_set.csv')
validation_df = pd.read_csv('validation_set.csv')

# Normalize scores
train_df['virality_score_%'] /= 100
train_df['engagement_score_%'] /= 100

train_df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,video_id,virality_score_%,engagement_score_%,transcript
0,2692,2692,SFArlklGleY,0.512236,0.797396,s none of them exploded [Music] okay hi hey M...
1,171,171,ftYQJVeOaYg,0.773242,0.773702,what has been like your biggest purchase prob...
2,3296,3296,Yn1H91Smzf0,0.502229,0.661009,Montero and who's Lon nzx I think Montero wa...
3,920,920,t_Nut4StXR0,0.532414,0.937185,the internet is the best thing since sliced b...
4,22,22,8XIAcr6Ph7A,0.816056,0.864501,why does r want me to call him do not tell me...
...,...,...,...,...,...,...
2437,768,768,-J3b9tY03UU,0.626388,0.926755,this is the fattest cat I've ever seen this i...
2438,2118,2118,BKN0lkc5VZ8,1.000000,0.646552,one of the best war zone players face swag re...
2439,3376,3376,z6uKRGliME0,0.502320,0.615980,I I just do feel like for very significant pe...
2440,3870,3870,XNtD1SEy1dA,0.509992,0.706151,you were saying that there's something about ...


# Extractign Embeddings 

using a pre-trained model to extract the feature embeddings for each frame and then storing in another location on the drive.

In [2]:
import cv2
from torchvision import transforms

def extract_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        # Convert BGR (OpenCV default) to RGB
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Preprocess frame
        frame = preprocess_image(frame)
        frames.append(frame)
    cap.release()
    return frames

def preprocess_image(frame):
    preprocess = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    return preprocess(frame)


ModuleNotFoundError: No module named 'cv2'

In [None]:
import torchvision.models as models
from torch import nn

# Load the pre-trained ResNet model and remove the last classification layer
model = models.resnet50(pretrained=True)
model = nn.Sequential(*list(model.children())[:-1])
model.eval()

In [None]:
import torch
import h5py
import numpy as np

SAVE_PATH = "/content/drive/MyDrive/DownloadedTranscriptVideos"
OUTPUT_PATH = "/content/drive/MyDrive/RESNETEmbeddings"


def process_video(video_id):
    video_path = get_video_path(video_id)
    frames = extract_frames(video_path)
    embeddings = []

    with torch.no_grad():
        for frame in frames:
            frame = frame.unsqueeze(0)  # Add batch dimension
            embedding = model(frame)
            embedding = embedding.view(embedding.size(0), -1)  # Flatten the features
            embeddings.append(embedding.numpy())

    # Save embeddings
    save_embeddings(video_id, embeddings)

def get_output_path(video_id):
    return OUTPUT_PATH + "/resnet50/" + "embeddings_" + video_id + ".h5"

def save_embeddings(video_id, embeddings):
    output_file = get_output_path(video_id)
    with h5py.File(output_file, 'w') as h5f:
        h5f.create_dataset('embeddings', data=np.stack(embeddings))

def get_video_path(video_id):
    return SAVE_PATH + "/video_" + video_id + ".mp4"

In [None]:
import os

videos_and_scores_df = pd.read_csv("videos_and_scores.csv")
for index, row in videos_and_scores_df.iterrows():
    
    embedding_path  = get_output_path(row['video_id'])
    
    if os.path.exists(embedding_path):
        print("Embeddings already extracted - continuing ")
        continue
    
    
    if not os.path.exists(get_video_path(row['video_id'])):
        print("Video file does not exist - continueing")
        continue 
    
    print(f"Processing video - {row['video_id']}")
    process_video(row['video_id'])