In [5]:
%cd /content/drive/MyDrive/DL_Homework/DL_2st

/content/drive/MyDrive/DL_Homework/DL_2st


In [7]:
import torch
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from transformers import ViTForImageClassification, ViTFeatureExtractor
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm

# 使用 CUDA 或 CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 数据预处理
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# 加载 CIFAR - 10 数据集
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 使用 HuggingFace 的 ViT 模型和特征提取器
feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=10)

# 将模型移到适当的设备（GPU 或 CPU）
model.to(device)

# 训练设置
optimizer = optim.Adam(model.parameters(), lr=1e-5)  # 使用较低的学习率
criterion = torch.nn.CrossEntropyLoss()

# 混合精度训练所需的缩放器
scaler = GradScaler()

# 训练和评估循环
num_epochs = 10
for epoch in range(num_epochs):
    # 训练阶段
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    train_pbar = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{num_epochs} Training', unit='batch')
    for images, labels in train_pbar:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        # 混合精度训练
        with autocast():
            outputs = model(images).logits
            loss = criterion(outputs, labels)

        # 反向传播和更新参数
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # 统计训练准确率
        _, preds = torch.max(outputs, 1)
        correct_train += (preds == labels).sum().item()
        total_train += labels.size(0)
        running_loss += loss.item()

        # 释放不必要的变量
        del images, labels, outputs, loss
        torch.cuda.empty_cache()

        # 更新进度条信息
        train_pbar.set_postfix({'Loss': running_loss / (train_pbar.n + 1), 'Accuracy': correct_train / total_train * 100})

    # 在训练集上的准确率
    train_acc = correct_train / total_train * 100

    # 评估阶段
    model.eval()
    correct_test = 0
    total_test = 0
    test_pbar = tqdm(test_loader, desc=f'Epoch {epoch + 1}/{num_epochs} Testing', unit='batch')
    with torch.no_grad():
        for images, labels in test_pbar:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images).logits
            _, preds = torch.max(outputs, 1)
            correct_test += (preds == labels).sum().item()
            total_test += labels.size(0)

            # 更新进度条信息
            test_pbar.set_postfix({'Accuracy': correct_test / total_test * 100})

    # 在测试集上的准确率
    test_acc = correct_test / total_test * 100

    print(f"Epoch {epoch + 1}/{num_epochs} | Train Accuracy: {train_acc:.2f}% | Test Accuracy: {test_acc:.2f}%")

    # 每隔 5 个 epoch 保存一次模型
    if (epoch + 1) % 1 == 0:
        save_path = f"checkpoints/vit_epoch_{epoch + 1}.pth"
        torch.save(model.state_dict(), save_path)
        print(f"Model saved at epoch {epoch + 1} as {save_path}")


Files already downloaded and verified
Files already downloaded and verified


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 2.12 MiB is free. Process 5020 has 14.74 GiB memory in use. Of the allocated memory 14.23 GiB is allocated by PyTorch, and 378.68 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
import torch
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from transformers import ViTForImageClassification, ViTFeatureExtractor
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm
import os

# 使用 CUDA 或 CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 数据预处理
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# 加载 CIFAR - 10 数据集
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 使用 HuggingFace 的 ViT 模型和特征提取器
feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=10)

# 将模型移到适当的设备（GPU 或 CPU）
model.to(device)

# 训练设置
optimizer = optim.Adam(model.parameters(), lr=1e-5)  # 使用较低的学习率
criterion = torch.nn.CrossEntropyLoss()

# 混合精度训练所需的缩放器
scaler = GradScaler()

# 训练和评估循环
num_epochs = 10
for epoch in range(num_epochs):
    # 训练阶段
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    train_pbar = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{num_epochs} Training', unit='batch')
    for images, labels in train_pbar:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        # 混合精度训练
        with autocast():
            outputs = model(images).logits
            loss = criterion(outputs, labels)

        # 反向传播和更新参数
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # 统计训练准确率
        _, preds = torch.max(outputs, 1)
        correct_train += (preds == labels).sum().item()
        total_train += labels.size(0)
        running_loss += loss.item()

        # 释放不必要的变量
        del images, labels, outputs, loss
        torch.cuda.empty_cache()

        # 更新进度条信息
        train_pbar.set_postfix({'Loss': running_loss / (train_pbar.n + 1), 'Accuracy': correct_train / total_train * 100})

    # 在训练集上的准确率
    train_acc = correct_train / total_train * 100

    # 评估阶段
    model.eval()
    correct_test = 0
    total_test = 0
    test_pbar = tqdm(test_loader, desc=f'Epoch {epoch + 1}/{num_epochs} Testing', unit='batch')
    with torch.no_grad():
        for images, labels in test_pbar:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images).logits
            _, preds = torch.max(outputs, 1)
            correct_test += (preds == labels).sum().item()
            total_test += labels.size(0)

            # 更新进度条信息
            test_pbar.set_postfix({'Accuracy': correct_test / total_test * 100})

    # 在测试集上的准确率
    test_acc = correct_test / total_test * 100

    print(f"Epoch {epoch + 1}/{num_epochs} | Train Accuracy: {train_acc:.2f}% | Test Accuracy: {test_acc:.2f}%")

    # 每隔 1 个 epoch 保存一次模型
    if (epoch + 1) % 1 == 0:
        save_dir = "checkpoints"
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        save_path = f"{save_dir}/vit_epoch_{epoch + 1}.pth"
        torch.save(model.state_dict(), save_path)
        print(f"Model saved at epoch {epoch + 1} as {save_path}")

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = GradScaler()
  with autocast():
Epoch 1/10 Training: 100%|██████████| 782/782 [09:13<00:00,  1.41batch/s, Loss=0.768, Accuracy=93.1]
Epoch 1/10 Testing: 100%|██████████| 157/157 [02:01<00:00,  1.30batch/s, Accuracy=97.6]


Epoch 1/10 | Train Accuracy: 93.06% | Test Accuracy: 97.56%
Model saved at epoch 1 as checkpoints/vit_epoch_1.pth


Epoch 2/10 Training: 100%|██████████| 782/782 [09:11<00:00,  1.42batch/s, Loss=0.171, Accuracy=98.8]
Epoch 2/10 Testing: 100%|██████████| 157/157 [02:01<00:00,  1.30batch/s, Accuracy=97.9]


Epoch 2/10 | Train Accuracy: 98.83% | Test Accuracy: 97.90%
Model saved at epoch 2 as checkpoints/vit_epoch_2.pth


Epoch 3/10 Training: 100%|██████████| 782/782 [09:14<00:00,  1.41batch/s, Loss=0.0838, Accuracy=99.6]
Epoch 3/10 Testing: 100%|██████████| 157/157 [02:01<00:00,  1.29batch/s, Accuracy=98]


Epoch 3/10 | Train Accuracy: 99.55% | Test Accuracy: 98.01%
Model saved at epoch 3 as checkpoints/vit_epoch_3.pth


Epoch 4/10 Training: 100%|██████████| 782/782 [09:14<00:00,  1.41batch/s, Loss=0.0482, Accuracy=99.8]
Epoch 4/10 Testing: 100%|██████████| 157/157 [02:00<00:00,  1.30batch/s, Accuracy=98.1]


Epoch 4/10 | Train Accuracy: 99.75% | Test Accuracy: 98.11%
Model saved at epoch 4 as checkpoints/vit_epoch_4.pth


Epoch 5/10 Training:  88%|████████▊ | 685/782 [08:06<01:08,  1.43batch/s, Loss=0.0311, Accuracy=99.8]

In [2]:
# memory footprint support libraries/code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize


Collecting gputil
  Downloading GPUtil-1.4.0.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: gputil
  Building wheel for gputil (setup.py) ... [?25l[?25hdone
  Created wheel for gputil: filename=GPUtil-1.4.0-py3-none-any.whl size=7392 sha256=e67deb4f6a764b235d0c1682ad8941e755f7d4900ae0d99be7be1c4f3fec151b
  Stored in directory: /root/.cache/pip/wheels/2b/4d/8f/55fb4f7b9b591891e8d3f72977c4ec6c7763b39c19f0861595
Successfully built gputil
Installing collected packages: gputil
Successfully installed gputil-1.4.0


In [12]:
import psutil
import humanize
import os
import GPUtil as GPU

GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
    process = psutil.Process(os.getpid())
    print("Gen RAM Free: " + humanize.naturalsize(psutil.virtual_memory().available), " |     Proc size: " + humanize.naturalsize(process.memory_info().rss))
    print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total     {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()


Gen RAM Free: 10.3 GB  |     Proc size: 2.3 GB
GPU RAM Free: 13046MB | Used: 2048MB | Util  13% | Total     15360MB


In [14]:
import gc
gc.collect() # Python thing
# torch.cuda.empty_cache() # PyTorch thing
with torch.no_grad():
    torch.cuda.empty_cache()