In [None]:
import os

import torch
import pandas as pd
from torch.autograd import profiler
import whisper
from time import perf_counter

In [None]:
def get_max_memory_allocation():
    return torch.cuda.max_memory_allocated() / (1024 ** 3)
    
def get_video_category(filename: str):
    video_number = int(filename.split('.')[0][-1])
    if video_number <= 2:
        return 1
    elif video_number >= 5:
        return 3
    else:
        return 2

In [None]:
model_sizes = ['tiny', 'base', 'small', 'medium', 'large']
audio_samples = [os.path.join('audios', file) for file in os.listdir('audios')] 
device = 'cpu'

In [None]:
results = pd.DataFrame()

In [None]:
all_results = []

for size in model_sizes:
    for sample in audio_samples:
        model = whisper.load_model(size, device=device)
        
        start_time = perf_counter()
        model.transcribe(sample)
        end_time = perf_counter()
        time_elapsed = end_time - start_time
        
        results = {
            'video': sample,
            'category': get_video_category(filename=sample),
            'gpu_memory': get_max_memory_allocation(),
            'model': size,
            'time_elapsed': time_elapsed,
        }

        print(f"Ran {size=} with {sample=} taking {time_elapsed} seconds")
        
        del model
        torch.cuda.empty_cache()
        all_results.append(results) 

In [None]:
df = pd.DataFrame.from_dict(all_results)
df.to_csv('analysis-data/memory_results_cpu.csv', index=False)