## Imports

In [1]:
import pandas as pd
import torch
import torch
import torch.nn as nn
from torchvision import models
import os
from utils import process_video_frames_tchw, count_predictions
import numpy as np
import matplotlib.pyplot as plt

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

#### Directory Setup

In [3]:
# Root directory for dataset
ROOT_DIR = '../../data/RepCount'
VIDEO_DIR = os.path.join(ROOT_DIR, 'video', 'test')

# Checkpoint path
CHECKPOINT_DIR = './'
TEST_ANNOTATION = os.path.join('./', 'test.csv')
ACTION_ANNOTATION = './all_action.csv'

#### Action Trigger Module Settings

In [4]:
ENTER_THRESHOLD = 0.78
EXIT_THRESHOLD = 0.4
MOMENTUM = 0.4

BATCH_SIZE = 1024

#### Model initialization

In [5]:
# Initialize model and optimizer
model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 16)
model = model.to(device)



In [6]:
checkpoint_path = os.path.join(CHECKPOINT_DIR, 'best_resnet50_v1.pth')
model.load_state_dict(torch.load(checkpoint_path))

df = pd.read_csv(TEST_ANNOTATION)

  model.load_state_dict(torch.load(checkpoint_path))


In [7]:
df['type'].unique()

array(['situp', 'push_up', 'pull_up', 'bench_pressing', 'jump_jack',
       'squat', 'front_raise', 'pommelhorse'], dtype=object)

In [8]:

actions = pd.read_csv(ACTION_ANNOTATION)
action_mapping = dict(zip(actions['action'], actions['label']))
action_mapping

{'pommelhorse': 0,
 'bench_pressing': 1,
 'pull_up': 2,
 'jump_jack': 3,
 'situp': 4,
 'front_raise': 5,
 'squat': 6,
 'push_up': 7}

In [9]:
row = df.iloc[81]
row

Unnamed: 0             81
type          front_raise
name          stu4_12.mp4
count                   4
L1                   33.0
                 ...     
L298                  NaN
L299                  NaN
L300                  NaN
L301                  NaN
L302                  NaN
Name: 81, Length: 306, dtype: object

### Helper Function

In [10]:
def predict_video(row):
    """Returns prediction, mae, and obo"""
    video_path = os.path.join(VIDEO_DIR, row['name'])
    action_type = row['type']
    ground_truth = row['count']
    transformed_tchw_tensor = process_video_frames_tchw(video_path).to(device)
    batch_size = 1024

    Y = []

    model.eval() 
    with torch.no_grad(): 
        for i in range(0, len(transformed_tchw_tensor)+1, batch_size):
            batch = transformed_tchw_tensor[i:i+batch_size].cuda() 
            if batch.size(0) == 0:  # Check if the batch is empty
                continue
            output = model(batch)
            idx = action_mapping[action_type]
            y_hat = output.cpu().numpy()[0][idx:idx+2]
            confidences = softmax(y_hat)
            Y.append(confidences[1]) 

    # Y = np.concatenate(Y, axis=0)  
    # Y = Y.squeeze()

    # plt.plot(Y)

    return count_predictions(Y,
                             ground_truth,
                             ENTER_THRESHOLD,
                             EXIT_THRESHOLD,
                             MOMENTUM)

In [11]:
predict_video(row)

(0, 0.99999999975, 0)

### Cross Validation

In [12]:
oboas = []
maes = []

checkpoint_path = os.path.join(CHECKPOINT_DIR, 'best_resnet50_v1.pth')
model.load_state_dict(torch.load(checkpoint_path))

df = pd.read_csv(TEST_ANNOTATION)
df[['prediction', 'mae', 'obo']] = df.apply(lambda row: pd.Series(predict_video(row)), axis=1)
oboas.append(df['obo'].mean())
maes.append(df['mae'].mean())

save_file = os.path.join(CHECKPOINT_DIR, f'results_repcount_cowbytes.csv')
df.to_csv(save_file, index=False)

  model.load_state_dict(torch.load(checkpoint_path))


### Results

In [16]:
oboas

[0.026490066225165563]

In [17]:
maes

[0.9933774832866561]

In [18]:
df['obo'].mean()

0.026490066225165563

TODO:
1. Probably try weighted
2. Try density maps
3. Tune the action trigger module