In [1]:
from google.colab import drive
import os

drive.mount('/content/drive')
os.listdir("/content/drive/MyDrive")
os.chdir("/content/drive/MyDrive/hmdb51")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install spikingjelly



In [3]:
! pip install av
#! wget https://raw.githubusercontent.com/pytorch/vision/6de158c473b83cf43344a0651d7c01128c7850e6/references/video_classification/transforms.py



In [4]:
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import random_split, DataLoader
from torch.optim.lr_scheduler import StepLR
import torchvision
from torchvision import get_video_backend
from torchvision.models.video import r3d_18
from torchvision import transforms
import os
import av
from tqdm.auto import tqdm
import numpy as np
import time
import datetime
import random
import transforms as T
import matplotlib.pyplot as plt


from spikingjelly.activation_based import layer, neuron, surrogate, encoding, functional

In [5]:
class HMDB51CSNN(nn.Module):
  def __init__(self, channels=128):
    super().__init__()

    conv = []
    for i in range (5):
      if conv.__len__() == 0:
        in_channels = 3
      else:
        in_channels = channels

      conv.append(layer.Conv2d(in_channels, channels, kernel_size=3, padding=1, bias=False))
      conv.append(layer.BatchNorm2d(channels))
      conv.append(neuron.IFNode(surrogate_function=surrogate.ATan()))
      conv.append(layer.MaxPool2d(2,2)) # 112->56->28->14->7->3

    self.conv_fc = nn.Sequential(
        *conv,

        layer.Flatten(),
        layer.Dropout(0.5),
        layer.Linear(channels * 3 * 3, 512),

        layer.Dropout(0.5),
        layer.Linear(512, 510),
        neuron.IFNode(surrogate_function=surrogate.ATan()),

        layer.VotingLayer(10)
    )

    functional.set_step_mode(self, step_mode='m')

  def forward(self, x: torch.Tensor):
    return self.conv_fc(x)


In [7]:
# Datasets and Dataloaders for model training ..

val_split = 0.05
num_frames = 20
clip_steps = 50
num_workers = 8
pin_memory = True
train_tfms = torchvision.transforms.Compose([
                                 T.ToFloatTensorInZeroOne(),
                                 T.Resize((128, 171)),
                                 T.RandomHorizontalFlip(),
                                 T.Normalize(mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989]),
                                 T.RandomCrop((112, 112))
                               ])
test_tfms =  torchvision.transforms.Compose([
                                             T.ToFloatTensorInZeroOne(),
                                             T.Resize((128, 171)),
                                             T.Normalize(mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989]),
                                             T.CenterCrop((112, 112))
                                             ])
hmdb51_train = torchvision.datasets.HMDB51('video_data/', 'test_train_splits/', num_frames,
                                                step_between_clips = clip_steps, fold=1, train=True,
                                                transform=train_tfms, num_workers=num_workers)


hmdb51_test = torchvision.datasets.HMDB51('video_data/', 'test_train_splits/', num_frames,
                                                step_between_clips = clip_steps, fold=1, train=False,
                                                transform=test_tfms, num_workers=num_workers)

total_train_samples = len(hmdb51_train)
total_val_samples = round(val_split * total_train_samples)

print(f"number of train samples {total_train_samples}")
print(f"number of validation samples {total_val_samples}")
print(f"number of test samples {len(hmdb51_test)}")

KeyboardInterrupt: 

In [None]:
batch_size = 32
num_workers = 0

kwargs = {'num_workers':num_workers, 'pin_memory':True} if torch.cuda.is_available() else {'num_workers':num_workers}
#kwargs = {'num_workers':num_workers}
#kwargs = {}

hmdb51_train_v1, hmdb51_val_v1 = random_split(hmdb51_train, [total_train_samples - total_val_samples,
                                                                       total_val_samples])

#hmdb51_train_v1.video_clips.compute_clips(16, 1, frame_rate=30)
#hmdb51_val_v1.video_clips.compute_clips(16, 1, frame_rate=30)
#hmdb51_test.video_clips.compute_clips(16, 1, frame_rate=30)

#train_sampler = RandomClipSampler(hmdb51_train_v1.video_clips, 5)
#test_sampler = UniformClipSampler(hmdb51_test.video_clips, 5)

train_loader = DataLoader(hmdb51_train_v1, batch_size=batch_size, shuffle=True, **kwargs)
val_loader   = DataLoader(hmdb51_val_v1, batch_size=batch_size, shuffle=True, **kwargs)
test_loader  = DataLoader(hmdb51_test, batch_size=batch_size, shuffle=False, **kwargs)

In [None]:
batch = next(iter(train_loader))
print(f"Batch type: {type(batch)}")
print(f"Batch length: {len(batch)}")

video, audio, label = next(iter(train_loader))
print(video.shape) # (batch size, channels, frames, height, width)
print(audio.shape)
print(label.shape) # (batch size)



Batch type: <class 'list'>
Batch length: 3
torch.Size([32, 3, 20, 112, 112])
torch.Size([32, 1, 0])
torch.Size([32])


In [None]:
import joblib

joblib.dump(train_loader, "hmdb51_train.pkl")
joblib.dump(test_loader, "hmdb51_test.pkl")

['hmdb51_test.pkl']

아래 코드로 train_loader, test_loader 바로 가져오기

In [6]:
import joblib

train_loader = joblib.load("hmdb51_train.pkl")
test_loader = joblib.load("hmdb51_test.pkl")



In [18]:
print(f"train_loader의 총 sample 개수: {len(train_loader.dataset)}")
print(f"test_loader의 총 sample 개수: {len(test_loader.dataset)}")
print(f"batch 당 sample 수 (batch size): {train_loader.batch_size}")

train_loader의 총 sample 개수: 7059
test_loader의 총 sample 개수: 3101
batch 당 sample 수 (batch size): 32


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
lr = 0.001
gamma = 0.7
epochs = 30
config = {}
net = HMDB51CSNN().to(device)
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
encoder = encoding.PoissonEncoder()
scaler = torch.cuda.amp.GradScaler()

  scaler = torch.cuda.amp.GradScaler()


In [8]:
checkpoint_path = "/content/drive/MyDrive/commit/graduate/hmdb51csnn_checkpoint.pth"
start_epoch = 0
max_test_acc = -1
train_losses = []
train_accs = []
test_losses = []
test_accs = []

# check point 불러오기
if os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    net.load_state_dict(checkpoint["model_state"])
    optimizer.load_state_dict(checkpoint["optimizer_state"])
    start_epoch = checkpoint["epoch"]
    max_test_acc = checkpoint["max_test_acc"]
    train_losses = checkpoint.get("train_losses", [])
    train_accs = checkpoint.get("train_accs", [])
    test_losses = checkpoint.get("test_losses", [])
    test_accs = checkpoint.get("test_accs", [])
    print(f"체크포인트 불러옴: epoch {start_epoch}, max_test_acc={max_test_acc*100:.2f}%")
else:
    print("새로운 학습을 시작합니다.")

# train & test
for epoch in range(start_epoch, epochs):
    start_time = time.time()
    net.train()
    train_loss = 0
    train_acc = 0
    train_samples = 0

    for batch_idx, (frame, _, label) in enumerate(train_loader):
        optimizer.zero_grad()
        frame = frame.to(device)
        frame = frame.permute(2,0,1,3,4) # [N,T,C,H,W] -> [T,N,C,H,W]
        label = label.to(device)
        label_onehot = F.one_hot(label,  51).float()

        out_fr = net(frame).mean(0)
        loss = F.mse_loss(out_fr, label_onehot)
        loss.backward()
        optimizer.step()

        train_samples += label.numel()
        train_loss += loss.item() * label.numel()
        train_acc += (out_fr.argmax(1) == label).float().sum().item()

        functional.reset_net(net)

    train_time = time.time()
    train_speed = train_samples / (train_time - start_time)
    train_loss /= train_samples
    train_acc /= train_samples
    print(f'epoch {epoch}: train_loss={train_loss*100:.2f}%, train_acc={train_acc*100:.2f}%, train_speed={train_speed:.4f}images/s')

    net.eval()
    test_loss = 0
    test_acc = 0
    test_samples = 0
    with torch.no_grad():
        for frame, _, label in test_loader:
            frame = frame.to(device)
            frame = frame.permute(2,0,1,3,4)  # [N, T, C, H, W] -> [T, N, C, H, W]
            label = label.to(device)
            label_onehot = F.one_hot(label, 51).float()
            out_fr = net(frame).mean(0)
            loss = F.mse_loss(out_fr, label_onehot)
            test_samples += label.numel()
            test_loss += loss.item() * label.numel()
            test_acc += (out_fr.argmax(1) == label).float().sum().item()
            functional.reset_net(net)

    test_time = time.time()
    test_speed = test_samples / (test_time - train_time)
    test_loss /= test_samples
    test_acc /= test_samples

    if test_acc > max_test_acc:
        max_test_acc = test_acc

    print(f'epoch = {epoch}, test_loss={test_loss*100:.2f}%, test_acc={test_acc*100:.2f}%, max_test_acc={max_test_acc*100:.2f}%, test_speed={test_speed:.4f}images/s')
    print(f'escape time = {(datetime.datetime.now() + datetime.timedelta(seconds=(time.time() - start_time) * (epochs - epoch))).strftime("%Y-%m-%d %H:%M:%S")}\n')

    train_losses.append(train_loss)
    train_accs.append(train_acc)
    test_losses.append(test_loss)
    test_accs.append(test_acc)

    checkpoint = {
        "epoch": epoch + 1,  # 다음 epoch부터 학습을 이어가므로 1 증가
        "model_state": net.cpu().state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "max_test_acc": max_test_acc,
        "train_losses": train_losses,
        "train_accs": train_accs,
        "test_losses": test_losses,
        "test_accs": test_accs
    }
    torch.save(checkpoint, checkpoint_path)
    net.to(device)


  checkpoint = torch.load(checkpoint_path)


체크포인트 불러옴: epoch 10, max_test_acc=13.83%




RuntimeError: The following operation failed in the TorchScript interpreter.
Traceback of TorchScript (most recent call last):
RuntimeError: The following operation failed in the TorchScript interpreter.
Traceback of TorchScript (most recent call last):
RuntimeError: CUDA out of memory. Tried to allocate 196.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 56.12 MiB is free. Process 104767 has 14.68 GiB memory in use. Of the allocated memory 14.55 GiB is allocated by PyTorch, and 15.43 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)



In [None]:
torch.cuda.empty_cache()

In [None]:
checkpoint_path = "/content/drive/MyDrive/commit/graduate/hmdb51csnn_checkpoint.pth"
checkpoint = torch.load(checkpoint_path)

# 저장된 데이터 가져오기
epochs = checkpoint["epoch"]  # 저장된 마지막 epoch
max_test_acc = checkpoint["max_test_acc"]  # 최고 테스트 정확도
train_losses = checkpoint.get("train_losses", [])  # 학습 손실
test_losses = checkpoint.get("test_losses", [])  # 테스트 손실
test_accs = checkpoint.get("test_accs", [])  # 테스트 정확도

# epoch 리스트 생성
epoch_list = list(range(1, epochs + 1))

# 그래프 크기 설정
plt.figure(figsize=(12, 5))

# Loss 그래프
plt.subplot(1, 2, 1)
plt.plot(epoch_list, train_losses, label="Train Loss", marker="o")
plt.plot(epoch_list, test_losses, label="Test Loss", marker="o")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Epoch vs Loss")
plt.legend()
plt.grid()

# Accuracy 그래프
plt.subplot(1, 2, 2)
plt.plot(epoch_list, test_accs, label="Test Accuracy", marker="o", color="r")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Epoch vs Accuracy")
plt.legend()
plt.grid()

# 그래프 출력
plt.show()