In [1]:
!pip -q install huggingface_hub hf_transfer torch torchvision torchaudio pillow tqdm torchmetrics

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/3.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m109.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/983.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.2/983.2 kB[0m [31m76.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import os
from huggingface_hub import hf_hub_download

os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"  # speeds up downloads in Colab

DATASET_ID = "cj-mills/hagrid-sample-500k-384p"
ZIP_NAME = "hagrid-sample-500k-384p.zip"

zip_path = hf_hub_download(
    repo_id=DATASET_ID,
    filename=ZIP_NAME,
    repo_type="dataset",
    local_dir="/content/hagrid_zip",
    local_dir_use_symlinks=False,
)
print("Downloaded to:", zip_path)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


hagrid-sample-500k-384p.zip:   0%|          | 0.00/13.4G [00:00<?, ?B/s]

Downloaded to: /content/hagrid_zip/hagrid-sample-500k-384p.zip


In [3]:
!mkdir -p /content/hagrid_500k
!unzip -q "/content/hagrid_zip/hagrid-sample-500k-384p.zip" -d /content/hagrid_500k
!ls -lah /content/hagrid_500k | head

total 12K
drwxr-xr-x 3 root root 4.0K Feb 27 09:45 .
drwxr-xr-x 1 root root 4.0K Feb 27 09:45 ..
drwxrwxrwx 4 root root 4.0K Jun 20  2023 hagrid-sample-500k-384p


In [4]:
import os

root = "/content/hagrid_500k"
for path, dirs, files in os.walk(root):
    print("DIR:", path)
    print("  subdirs:", dirs[:5])
    print("  files:", files[:10])
    break

DIR: /content/hagrid_500k
  subdirs: ['hagrid-sample-500k-384p']
  files: []


In [5]:
import torch, torch.nn as nn
from torchvision.models import resnet18, ResNet18_Weights

CLASSES = ["stop", "stop_inverted", "two_up", "two_up_inverted", "one_up", "one_down", "fist", "call"]
class_to_idx = {c:i for i,c in enumerate(CLASSES)}

model = resnet18(weights=ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, len(CLASSES))
model = model.cuda()

loss_fn = nn.CrossEntropyLoss()
opt = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 238MB/s]


In [6]:
!find /content/hagrid_500k -maxdepth 3 -type f | sed -n '1,120p'

/content/hagrid_500k/hagrid-sample-500k-384p/annotations_df.parquet
/content/hagrid_500k/hagrid-sample-500k-384p/ann_train_val/peace.json
/content/hagrid_500k/hagrid-sample-500k-384p/ann_train_val/palm.json
/content/hagrid_500k/hagrid-sample-500k-384p/ann_train_val/fist.json
/content/hagrid_500k/hagrid-sample-500k-384p/ann_train_val/three2.json
/content/hagrid_500k/hagrid-sample-500k-384p/ann_train_val/call.json
/content/hagrid_500k/hagrid-sample-500k-384p/ann_train_val/peace_inverted.json
/content/hagrid_500k/hagrid-sample-500k-384p/ann_train_val/ok.json
/content/hagrid_500k/hagrid-sample-500k-384p/ann_train_val/stop_inverted.json
/content/hagrid_500k/hagrid-sample-500k-384p/ann_train_val/four.json
/content/hagrid_500k/hagrid-sample-500k-384p/ann_train_val/two_up_inverted.json
/content/hagrid_500k/hagrid-sample-500k-384p/ann_train_val/mute.json
/content/hagrid_500k/hagrid-sample-500k-384p/ann_train_val/one.json
/content/hagrid_500k/hagrid-sample-500k-384p/ann_train_val/stop.json
/cont

In [7]:
!pip -q install huggingface_hub hf_transfer pandas pyarrow pillow opencv-python tqdm torch torchvision torchmetrics

In [8]:
import os, glob
candidates = glob.glob("/content/hagrid_500k/**/ann_train_val", recursive=True)
print("ann_train_val folders:", candidates)

DATA_ROOT = os.path.dirname(candidates[0])  # parent of ann_train_val
print("DATA_ROOT =", DATA_ROOT)
print("Contents:", os.listdir(DATA_ROOT)[:20])

ann_train_val folders: ['/content/hagrid_500k/hagrid-sample-500k-384p/ann_train_val']
DATA_ROOT = /content/hagrid_500k/hagrid-sample-500k-384p
Contents: ['hagrid_500k', 'annotations_df.parquet', 'ann_train_val']


In [9]:
import pandas as pd

df = pd.read_parquet(os.path.join(DATA_ROOT, "annotations_df.parquet"))
print(df.columns)
print(df.head(2))

TARGET = ["stop", "stop_inverted", "two_up", "two_up_inverted", "one_up", "one_down", "fist", "call"]
df = df[df["labels"].apply(lambda xs: any(x in TARGET for x in xs))].reset_index(drop=True)

# Each row may have multiple bboxes/labels; we'll explode to one-hand-per-row below.
print("Rows after filter:", len(df))

Index(['bboxes', 'labels', 'leading_hand', 'leading_conf', 'user_id'], dtype='object')
                                                                                 bboxes  \
00005c9c-3548-4a8f-9d0b-2dd4aff37fc9  [[0.23925175, 0.28595301, 0.25055143, 0.207776...   
0000738b-f640-448f-b697-a6b7d218f5db  [[0.3452406, 0.41348879, 0.16873308, 0.1271769...   

                                                  labels leading_hand  \
00005c9c-3548-4a8f-9d0b-2dd4aff37fc9              [call]        right   
0000738b-f640-448f-b697-a6b7d218f5db  [call, no_gesture]        right   

                                      leading_conf  \
00005c9c-3548-4a8f-9d0b-2dd4aff37fc9           1.0   
0000738b-f640-448f-b697-a6b7d218f5db           1.0   

                                                                                user_id  
00005c9c-3548-4a8f-9d0b-2dd4aff37fc9  5a389ffe1bed6660a59f4586c7d8fe2770785e5bf79b09...  
0000738b-f640-448f-b697-a6b7d218f5db  7bc2b8ae46e76c547837aae519fd5af0389eee

In [10]:
import pandas as pd
import os

parquet_path = os.path.join(DATA_ROOT, "annotations_df.parquet")
df = pd.read_parquet(parquet_path)

print("Columns:")
print(df.columns)

print("\nExample rows:")
print(df.head(2))

Columns:
Index(['bboxes', 'labels', 'leading_hand', 'leading_conf', 'user_id'], dtype='object')

Example rows:
                                                                                 bboxes  \
00005c9c-3548-4a8f-9d0b-2dd4aff37fc9  [[0.23925175, 0.28595301, 0.25055143, 0.207776...   
0000738b-f640-448f-b697-a6b7d218f5db  [[0.3452406, 0.41348879, 0.16873308, 0.1271769...   

                                                  labels leading_hand  \
00005c9c-3548-4a8f-9d0b-2dd4aff37fc9              [call]        right   
0000738b-f640-448f-b697-a6b7d218f5db  [call, no_gesture]        right   

                                      leading_conf  \
00005c9c-3548-4a8f-9d0b-2dd4aff37fc9           1.0   
0000738b-f640-448f-b697-a6b7d218f5db           1.0   

                                                                                user_id  
00005c9c-3548-4a8f-9d0b-2dd4aff37fc9  5a389ffe1bed6660a59f4586c7d8fe2770785e5bf79b09...  
0000738b-f640-448f-b697-a6b7d218f5db  7bc2b8ae46e76c

In [11]:
# Make the index (image id) into a column called "image_id"
df2 = df.reset_index(names="image_id")

TARGET = ["stop", "stop_inverted", "two_up", "two_up_inverted", "one_up", "one_down", "fist", "call"]
df2 = df2[df2["labels"].apply(lambda xs: any(x in TARGET for x in xs))].reset_index(drop=True)

def explode_rows(df_in):
    rows = []
    for _, r in df_in.iterrows():
        image_id = r["image_id"]
        for bbox, label in zip(r["bboxes"], r["labels"]):
            if label in ["stop", "stop_inverted", "two_up", "two_up_inverted", "one", "fist", "call"]:
                rows.append({
                    "image_id": image_id,
                    "label": label,
                    "bbox": bbox,
                    "leading_hand": r["leading_hand"],       # optional
                    "leading_conf": float(r["leading_conf"])  # optional
                })
    return pd.DataFrame(rows)

samples_df = explode_rows(df2)

print(samples_df.head())
print("Total samples:", len(samples_df))
print(samples_df["label"].value_counts())

                               image_id label  \
0  00005c9c-3548-4a8f-9d0b-2dd4aff37fc9  call   
1  0000738b-f640-448f-b697-a6b7d218f5db  call   
2  0003d6d1-3489-4f57-ab7a-44744dba93fd  call   
3  000422c8-2e41-4611-9783-c6d219bea42e  call   
4  0005f16f-02c5-4017-87f0-448b760bbb50  call   

                                                bbox leading_hand  \
0   [0.23925175, 0.28595301, 0.25055143, 0.20777627]        right   
1    [0.3452406, 0.41348879, 0.16873308, 0.12717697]        right   
2  [0.41490118000000004, 0.36752657, 0.09329108, ...        right   
3  [0.62568722, 0.31751255, 0.09240559999999999, ...         left   
4     [0.646139, 0.42534042, 0.06807641, 0.05302235]        right   

   leading_conf  
0           1.0  
1           1.0  
2           1.0  
3           1.0  
4           1.0  
Total samples: 170686
label
two_up             29679
stop_inverted      28857
two_up_inverted    28236
call               28194
stop               27961
fist               27759
Name

In [12]:
import glob, os

DATA_ROOT = "/content/hagrid_500k/hagrid-sample-500k-384p"  # adjust if yours differs
jpgs = glob.glob(os.path.join(DATA_ROOT, "**", "*.jpg"), recursive=True)
print("Total jpgs found:", len(jpgs))
print("Example jpg paths:", jpgs[:5])

Total jpgs found: 509323
Example jpg paths: ['/content/hagrid_500k/hagrid-sample-500k-384p/hagrid_500k/train_val_peace_inverted/79cd1eb1-7619-428a-b71f-c6c37f2a34cb.jpg', '/content/hagrid_500k/hagrid-sample-500k-384p/hagrid_500k/train_val_peace_inverted/90a2e698-8035-4f76-aedb-d57d29689c47.jpg', '/content/hagrid_500k/hagrid-sample-500k-384p/hagrid_500k/train_val_peace_inverted/ce269f0d-5df0-4988-a727-7a158c25bce9.jpg', '/content/hagrid_500k/hagrid-sample-500k-384p/hagrid_500k/train_val_peace_inverted/ae46213c-c010-4501-b4cc-7c12bbec6e5f.jpg', '/content/hagrid_500k/hagrid-sample-500k-384p/hagrid_500k/train_val_peace_inverted/b80f9ecc-2889-46e1-8588-c3b61302dc5c.jpg']


In [13]:
example_id = samples_df.iloc[0]["image_id"]
hits = [p for p in jpgs if os.path.basename(p) == example_id + ".jpg"]
print("Example id:", example_id)
print("Hits:", hits[:3])

Example id: 00005c9c-3548-4a8f-9d0b-2dd4aff37fc9
Hits: ['/content/hagrid_500k/hagrid-sample-500k-384p/hagrid_500k/train_val_call/00005c9c-3548-4a8f-9d0b-2dd4aff37fc9.jpg']


In [14]:
import pandas as pd
import os

DATA_ROOT = "/content/hagrid_500k/hagrid-sample-500k-384p"  # your root
df = pd.read_parquet(os.path.join(DATA_ROOT, "annotations_df.parquet"))

TARGET = ["stop", "stop_inverted", "two_up", "two_up_inverted", "one_up", "one_down", "fist", "call"]

df2 = df.reset_index(names="image_id")
df2 = df2[df2["labels"].apply(lambda xs: any(x in ["stop", "stop_inverted", "two_up", "two_up_inverted", "one", "fist", "call"] for x in xs))].reset_index(drop=True)

def explode_rows(df_in):
    rows = []
    for _, r in df_in.iterrows():
        image_id = r["image_id"]
        for bbox, label in zip(r["bboxes"], r["labels"]):
            if label in ["stop", "stop_inverted", "two_up", "two_up_inverted", "one", "fist", "call"]:
                rows.append({
                    "image_id": image_id,
                    "label": label,
                    "bbox": bbox,
                })
    return pd.DataFrame(rows)

samples_df = explode_rows(df2)
print("Total crop samples:", len(samples_df))
print(samples_df["label"].value_counts())

Total crop samples: 199130
label
two_up             29679
stop_inverted      28857
one                28444
two_up_inverted    28236
call               28194
stop               27961
fist               27759
Name: count, dtype: int64


In [15]:
import glob, os

all_imgs = glob.glob(os.path.join(DATA_ROOT, "**", "*.jpg"), recursive=True)
print("Total jpgs:", len(all_imgs))

img_map = {os.path.splitext(os.path.basename(p))[0]: p for p in all_imgs}

# sanity check
ex_id = samples_df.iloc[0]["image_id"]
print("Example id:", ex_id)
print("Resolved path:", img_map.get(ex_id))

Total jpgs: 509323
Example id: 00005c9c-3548-4a8f-9d0b-2dd4aff37fc9
Resolved path: /content/hagrid_500k/hagrid-sample-500k-384p/hagrid_500k/train_val_call/00005c9c-3548-4a8f-9d0b-2dd4aff37fc9.jpg


In [16]:
!pip -q install scikit-learn
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(
    samples_df,
    test_size=0.1,
    random_state=42,
    stratify=samples_df["label"]
)

print(len(train_df), len(val_df))

179217 19913


In [17]:
import cv2
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

class_to_idx = {c:i for i,c in enumerate(TARGET)}

class HagridCropDataset(Dataset):
    def __init__(self, df, img_map, class_to_idx, transform=None, pad=0.15):
        self.df = df.reset_index(drop=True)
        self.img_map = img_map
        self.class_to_idx = class_to_idx
        self.transform = transform
        self.pad = pad

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        r = self.df.iloc[idx]
        image_id = r["image_id"]
        label = r["label"]
        x, y, w, h = r["bbox"]  # normalized [0..1]

        path = self.img_map[image_id]
        img_bgr = cv2.imread(path)
        H, W = img_bgr.shape[:2]

        # normalized -> pixels
        x1 = int(x * W); y1 = int(y * H)
        x2 = int((x + w) * W); y2 = int((y + h) * H)

        # padding
        pad_px = int(self.pad * max(x2 - x1, y2 - y1))
        x1 = max(0, x1 - pad_px); y1 = max(0, y1 - pad_px)
        x2 = min(W, x2 + pad_px); y2 = min(H, y2 + pad_px)

        crop_bgr = img_bgr[y1:y2, x1:x2]
        crop_rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB)


        # ROTATION AUGMENTATION
        import random
        if label == "one":
            direction = random.choice(["up", "down"])

            if direction == "down":
                crop_rgb = cv2.rotate(crop_rgb, cv2.ROTATE_180)
            label = f"one_{direction}"

        if self.transform is not None:
            x_t = self.transform(crop_rgb)
        else:
            x_t = torch.from_numpy(crop_rgb).permute(2,0,1).float() / 255.0

        y_t = self.class_to_idx[label]
        return x_t, y_t

IMG_SIZE = 224
train_tfms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.ColorJitter(0.2, 0.2, 0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])
val_tfms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

train_ds = HagridCropDataset(train_df, img_map, class_to_idx, transform=train_tfms)
val_ds   = HagridCropDataset(val_df,   img_map, class_to_idx, transform=val_tfms)

train_dl = DataLoader(train_ds, batch_size=64, shuffle=True,  num_workers=2, pin_memory=True)
val_dl   = DataLoader(val_ds,   batch_size=64, shuffle=False, num_workers=2, pin_memory=True)

len(train_ds), len(val_ds)


(179217, 19913)

In [18]:
import torch.nn as nn
from torchvision.models import resnet18, ResNet18_Weights
from torchmetrics.classification import MulticlassAccuracy
from tqdm.auto import tqdm
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = resnet18(weights=ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, len(TARGET))
model = model.to(device)

loss_fn = nn.CrossEntropyLoss()
opt = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

acc = MulticlassAccuracy(num_classes=len(TARGET)).to(device)

def run_epoch(dl, train=True):
    model.train() if train else model.eval()
    total_loss = 0.0
    acc.reset()

    ctx = torch.enable_grad() if train else torch.no_grad()
    with ctx:
        for x, y in tqdm(dl, leave=False):
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)

            logits = model(x)
            loss = loss_fn(logits, y)

            if train:
                opt.zero_grad()
                loss.backward()
                opt.step()

            total_loss += loss.item() * x.size(0)
            acc.update(logits.softmax(dim=1), y)

    return total_loss / len(dl.dataset), acc.compute().item()

for epoch in range(1, 6):
    tr_loss, tr_acc = run_epoch(train_dl, train=True)
    va_loss, va_acc = run_epoch(val_dl,   train=False)
    print(f"Epoch {epoch}: train loss {tr_loss:.4f} acc {tr_acc:.3f} | val loss {va_loss:.4f} acc {va_acc:.3f}")

  0%|          | 0/2801 [00:00<?, ?it/s]

  0%|          | 0/312 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd0c95b45e0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1707, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1690, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd0c95b45e0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1707, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 16

Epoch 1: train loss 0.0354 acc 0.988 | val loss 0.0099 acc 0.997


  0%|          | 0/2801 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd0c95b45e0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1707, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1690, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionErrorException ignored in: : <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd0c95b45e0>can only test a child process

Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1707, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 16

  0%|          | 0/312 [00:00<?, ?it/s]

Epoch 2: train loss 0.0092 acc 0.997 | val loss 0.0082 acc 0.998


  0%|          | 0/2801 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd0c95b45e0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1707, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1690, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd0c95b45e0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1707, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 16

  0%|          | 0/312 [00:00<?, ?it/s]

Epoch 3: train loss 0.0068 acc 0.998 | val loss 0.0066 acc 0.998


  0%|          | 0/2801 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd0c95b45e0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1707, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1690, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd0c95b45e0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1707, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 16

  0%|          | 0/312 [00:00<?, ?it/s]

Epoch 4: train loss 0.0070 acc 0.998 | val loss 0.0092 acc 0.998


  0%|          | 0/2801 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd0c95b45e0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1707, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1690, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^Exception ignored in: ^<function _MultiProcessingDataLoaderIter.__del__ at 0x7fd0c95b45e0>^
^^Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1707, in __del__
^^    ^self._shutdown_workers()^
^  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1690, in _shutdown_workers
^    ^if w.is_alive():^
^ ^ ^ ^ ^ ^

  0%|          | 0/312 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd0c95b45e0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1707, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1690, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd0c95b45e0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1707, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 16

Epoch 5: train loss 0.0067 acc 0.998 | val loss 0.0126 acc 0.996


In [19]:
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True,  num_workers=0, pin_memory=True, persistent_workers=False)
val_dl   = DataLoader(val_ds,   batch_size=64, shuffle=False, num_workers=0, pin_memory=True, persistent_workers=False)


In [20]:
import torch.nn as nn
from torchvision.models import resnet18, ResNet18_Weights
from torchmetrics.classification import MulticlassAccuracy
from tqdm.auto import tqdm
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = resnet18(weights=ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, len(TARGET))
model = model.to(device)

loss_fn = nn.CrossEntropyLoss()
opt = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

acc = MulticlassAccuracy(num_classes=len(TARGET)).to(device)

def run_epoch(dl, train=True):
    model.train() if train else model.eval()
    total_loss = 0.0
    acc.reset()

    ctx = torch.enable_grad() if train else torch.no_grad()
    with ctx:
        for x, y in tqdm(dl, leave=False):
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)

            logits = model(x)
            loss = loss_fn(logits, y)

            if train:
                opt.zero_grad()
                loss.backward()
                opt.step()

            total_loss += loss.item() * x.size(0)
            acc.update(logits.softmax(dim=1), y)

    return total_loss / len(dl.dataset), acc.compute().item()

for epoch in range(1, 6):
    tr_loss, tr_acc = run_epoch(train_dl, train=True)
    va_loss, va_acc = run_epoch(val_dl,   train=False)
    print(f"Epoch {epoch}: train loss {tr_loss:.4f} acc {tr_acc:.3f} | val loss {va_loss:.4f} acc {va_acc:.3f}")

  0%|          | 0/2801 [00:00<?, ?it/s]

  0%|          | 0/312 [00:00<?, ?it/s]

Epoch 1: train loss 0.0356 acc 0.988 | val loss 0.0118 acc 0.996


  0%|          | 0/2801 [00:00<?, ?it/s]

  0%|          | 0/312 [00:00<?, ?it/s]

Epoch 2: train loss 0.0089 acc 0.997 | val loss 0.0089 acc 0.997


  0%|          | 0/2801 [00:00<?, ?it/s]

  0%|          | 0/312 [00:00<?, ?it/s]

Epoch 3: train loss 0.0081 acc 0.997 | val loss 0.0076 acc 0.998


  0%|          | 0/2801 [00:00<?, ?it/s]

  0%|          | 0/312 [00:00<?, ?it/s]

Epoch 4: train loss 0.0070 acc 0.998 | val loss 0.0064 acc 0.998


  0%|          | 0/2801 [00:00<?, ?it/s]

  0%|          | 0/312 [00:00<?, ?it/s]

Epoch 5: train loss 0.0065 acc 0.998 | val loss 0.0087 acc 0.997


In [21]:
torch.save({
    "state_dict": model.state_dict(),
    "classes": CLASSES,
    "img_size": IMG_SIZE
}, "/content/gesture_resnet18.pt")

In [22]:
from google.colab import files
files.download("/content/gesture_resnet18.pt")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>