In [None]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold
import random
import os
import numpy as np
import torch

In [None]:
df = pd.read_csv('../important_csvs/events_with_number_of_frames.csv')

In [None]:
df

In [None]:
SEED = 42

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

In [None]:
skf = StratifiedKFold(n_splits=5)

In [None]:
df['fold'] = 0

In [None]:
df.loc[:, 'stratify_group'] = np.char.add(
    df['label'].values.astype(str),
    df['number_of_frames'].apply(lambda x: f'_{x // 76}').values.astype(str)
)

In [None]:
df

In [None]:
for fold_number, (train_index, val_index) in enumerate(skf.split(X=df['event_path'], y=df['stratify_group'])):
    df.loc[df.iloc[val_index].index, 'fold'] = fold_number

In [None]:
df

In [None]:
df.to_csv('../important_csvs/events_with_number_of_frames_stratified.csv', index=False)

In [None]:
df.fold.value_counts()