In [1]:
!pip install einops

Collecting einops
  Downloading einops-0.7.0-py3-none-any.whl (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: einops
Successfully installed einops-0.7.0


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange

In [3]:
print(torch.__version__)

2.1.0+cu118


In [4]:
# Mount the Google Cloud Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
class Embedding(nn.Module):  # Patch Embedding + Position Embedding + Class Embedding
    def __init__(self, image_channels=3, image_size=224, patch_size=16, dim=768, drop_ratio=0.):
        super(Embedding, self).__init__()
        self.num_patches = (image_size // patch_size) ** 2  # Patch nums

        self.patch_conv = nn.Conv2d(image_channels, dim, patch_size, patch_size)  # Use convolution to divide the image
        self.cls_token = nn.Parameter(torch.zeros(1, 1, dim))            # class embedding
        self.pos_emb = nn.Parameter(torch.zeros(1, self.num_patches + 1, dim))  # position embedding
        self.dropout = nn.Dropout(drop_ratio)

    def forward(self, x):
        x = self.patch_conv(x)
        x = rearrange(x, "B C H W -> B (H W) C")
        cls_token = torch.repeat_interleave(self.cls_token, x.shape[0], dim=0)  #Dimensional extension (1,1,dim) -> (B,1,dim)
        x = torch.cat([cls_token, x], dim=1)  # (B,1,dim) cat (B,num_patches,dim) --> (B,num_patches+1,dim)
        x = x + self.pos_emb  # Add location code
        return self.dropout(x)  # token

In [6]:
class MultiHeadAttention(nn.Module):  # Multi-Head Attention
    def __init__(self, dim, num_heads=8, drop_ratio=0.):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.head_dim = dim // num_heads

        self.qkv = nn.Linear(dim, dim * 3, bias=False)  # Using a linear network layer, the qkv matrix is calculated
        self.dropout = nn.Dropout(drop_ratio)
        self.proj = nn.Linear(dim, dim)

    def forward(self, x):
        # B: Batch Size / P: Num of Patches / H: Num of Heads / d: Dim of Head
        qkv = self.qkv(x)
        qkv = rearrange(qkv, "B P (C H d) -> C B H P d", C=3, H=self.num_heads, d=self.head_dim)
        q, k, v = qkv[0], qkv[1], qkv[2]  # split q，k，v
        k = rearrange(k, "B H P d -> B H d P")
        # Attention(Q, K, V ) = softmax(QKT/dk)V （T stands for transpose)
        attn = torch.matmul(q, k) * self.head_dim ** -0.5  # QKT/dk
        attn = F.softmax(attn, dim=-1)  # softmax(QKT/dk)
        attn = self.dropout(attn)
        x = torch.matmul(attn, v)  # softmax(QKT/dk)V
        x = rearrange(x, "B H P d -> B P (H d)")
        x = self.proj(x)
        x = self.dropout(x)
        return x

In [7]:

class MLP(nn.Module):  # MLP
    def __init__(self, in_dims, hidden_dims=None, drop_ratio=0.):
        super(MLP, self).__init__()
        if hidden_dims is None:
            hidden_dims = in_dims * 4

        self.fc1 = nn.Linear(in_dims, hidden_dims)
        self.fc2 = nn.Linear(hidden_dims, in_dims)
        self.gelu = nn.GELU()
        self.dropout = nn.Dropout(drop_ratio)

    def forward(self, x):
        # Linear + GELU + Dropout + Linear + Dropout
        x = self.fc1(x)
        x = self.gelu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.dropout(x)
        return x

In [8]:

class EncoderBlock(nn.Module):  # Transformer Encoder Block
    def __init__(self, dim, num_heads=8, drop_ratio=0.):
        super(EncoderBlock, self).__init__()

        self.layernorm1 = nn.LayerNorm(dim)
        self.multiheadattn = MultiHeadAttention(dim, num_heads)
        self.dropout = nn.Dropout(drop_ratio)
        self.layernorm2 = nn.LayerNorm(dim)
        self.mlp = MLP(dim)

    def forward(self, x):
        x0 = x
        x = self.layernorm1(x)
        x = self.multiheadattn(x)
        x = self.dropout(x)
        x1 = x + x0  # First residual connection
        x = self.layernorm2(x1)
        x = self.mlp(x)
        x = self.dropout(x)
        return x + x1  # Second residual connection

In [9]:
class MLPHead(nn.Module):  # MLP Head
    def __init__(self, dim, num_classes=1000):
        super(MLPHead, self).__init__()
        self.layernorm = nn.LayerNorm(dim)
        self.mlphead = nn.Linear(dim, num_classes)

    def forward(self, x):
        x = self.layernorm(x)
        cls = x[:, 0, :]  # class token
        return self.mlphead(cls)

In [10]:

class ViT(nn.Module):  # Vision Transformer
    def __init__(self, image_channels=3, image_size=224, num_classes=1000, patch_size=16, dim=768, num_heads=12,layers=12):
        super(ViT, self).__init__()
        self.embedding = Embedding(image_channels, image_size, patch_size, dim)
        self.encoder = nn.Sequential(
            *[EncoderBlock(dim, num_heads) for i in range(layers)]  # The encoder structure consists of layers Transformer Encoder blocks
        )
        self.head = MLPHead(dim, num_classes)

    def forward(self, x):
        x_emb = self.embedding(x)
        feature = self.encoder(x_emb)
        return self.head(feature)

# 1 Algorithm classification ability test

In [11]:
from torchvision.datasets import mnist
import torchvision
import torchvision.transforms as transforms
import torch.utils as utils
from torch.utils.data import DataLoader
from torchvision.models import resnet18

import matplotlib.pyplot as plt
import numpy as np

## Clothing classification: FashionMNIST

In [13]:
transform = transforms.Compose([transforms.ToTensor()])
training_data = torchvision.datasets.FashionMNIST(root='/content/drive/MyDrive/RPData', train=True, download=True, transform=transform)
testing_data = torchvision.datasets.FashionMNIST(root='/content/drive/MyDrive/RPData', train=False, download=True, transform=transform)

In [14]:
train_db, val_db = utils.data.random_split(training_data, [50000,10000])

train_loader = DataLoader(train_db, batch_size=32, shuffle=True)
val_loader = DataLoader(val_db, batch_size=32, shuffle=True)
test_loader = DataLoader(testing_data, batch_size=32, shuffle=True)

In [15]:
def vit_base(num_classes=10):
    return ViT(image_channels=1, image_size=28, num_classes=10, patch_size=7, dim=768, num_heads=12,layers=12)

In [16]:
device = torch.device("cuda")

net = vit_base().to(device=device, dtype=torch.float32)
optimizer = torch.optim.Adam(net.parameters(), lr=0.0001, weight_decay=1e-8)
criterion = nn.CrossEntropyLoss()

In [17]:
MinTrainLoss=999
epochs=5

train_loss = []
val_loss = []

train_acc=[]
val_acc=[]

for epoch in range(1, epochs+1):
    total_train_loss = []
    total_val_loss = []

    # Train
    net.train()
    for input_img, label in train_loader:
        input_img = input_img.cuda()
        label = label.cuda()
        optimizer.zero_grad()

        pred_img = net(input_img)
        loss = criterion(pred_img, label)
        loss.backward()
        optimizer.step()

    # Verify
    net.eval()
    current = 0
    with torch.no_grad():
        for val_img, val_label in val_loader:
            val_img = val_img.cuda()
            val_label = val_label.cuda()
            pred = net(val_img)
            total_val_loss.append(criterion(pred, val_label).item())
            current += (pred.argmax(1)==val_label).type(torch.float).sum().item()

    val_loss.append(np.mean(total_val_loss))
    val_acc.append(current/10000*100)

    print("epochs[%3d/%3d] val_loss: %.5f, val_acc: %.3f"%(epoch, epochs, val_loss[-1],val_acc[-1]))

epochs[  1/  5] val_loss: 0.48462, val_acc: 82.090
epochs[  2/  5] val_loss: 0.44647, val_acc: 83.910
epochs[  3/  5] val_loss: 0.39432, val_acc: 85.850
epochs[  4/  5] val_loss: 0.37556, val_acc: 86.250
epochs[  5/  5] val_loss: 0.39572, val_acc: 85.880


In [18]:
net.eval()
current = 0
total_test_loss = []
with torch.no_grad():
    for test_img, test_label in test_loader:
        test_img = test_img.cuda()
        test_label = test_label.cuda()

        pred = net(test_img)
        total_test_loss.append(criterion(pred, test_label).item())
        current += (pred.argmax(1)==test_label).type(torch.float).sum().item()

print("testdataset test_loss: %.5f, test_acc: %.3f"%(np.mean(total_val_loss), current/10000*100))

testdataset test_loss: 0.39572, test_acc: 85.220


## Color picture classification: CIFAR10

In [21]:
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
training_data = torchvision.datasets.CIFAR10(root='/content/drive/MyDrive/RPData', train=True, download=True, transform=transform)
testing_data = torchvision.datasets.CIFAR10(root='/content/drive/MyDrive/RPData', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [20]:
train_db, val_db = utils.data.random_split(training_data, [40000,10000])

train_loader = DataLoader(train_db, batch_size=64, shuffle=True)
val_loader = DataLoader(val_db, batch_size=64, shuffle=True)
test_loader = DataLoader(testing_data, batch_size=64, shuffle=True)

In [22]:
def vit_base(num_classes=10):
    return ViT(image_channels=3, image_size=32, num_classes=10, patch_size=8, dim=768, num_heads=8,layers=12)

In [25]:
device = torch.device("cuda")

net = vit_base().to(device=device, dtype=torch.float32)
optimizer = torch.optim.Adam(net.parameters(), lr=0.0001, weight_decay=1e-8)
criterion = nn.CrossEntropyLoss()

In [26]:
MinTrainLoss=999
epochs=8

cif_train_loss = []
cif_val_loss = []

cif_train_acc=[]
cif_val_acc=[]

for epoch in range(1, epochs+1):
    total_train_loss = []
    total_val_loss = []

    # Train
    net.train()
    for input_img, label in train_loader:
        input_img = input_img.cuda()
        label = label.cuda()
        optimizer.zero_grad()

        pred_img = net(input_img)
        loss = criterion(pred_img, label)
        loss.backward()
        optimizer.step()

    # Verify
    net.eval()
    current = 0
    with torch.no_grad():
        for val_img, val_label in val_loader:
            val_img = val_img.cuda()
            val_label = val_label.cuda()

            pred = net(val_img)
            total_val_loss.append(criterion(pred, val_label).item())
            current += (pred.argmax(1)==val_label).type(torch.float).sum().item()

    val_loss.append(np.mean(total_val_loss))
    val_acc.append(current/10000*100)

    print("epochs[%3d/%3d] val_loss: %.5f, val_acc: %.3f"%(epoch, epochs, val_loss[-1],val_acc[-1]))

epochs[  1/  8] val_loss: 1.69927, val_acc: 38.020
epochs[  2/  8] val_loss: 1.47330, val_acc: 46.710
epochs[  3/  8] val_loss: 1.40801, val_acc: 48.680
epochs[  4/  8] val_loss: 1.40210, val_acc: 48.790
epochs[  5/  8] val_loss: 1.34559, val_acc: 51.440
epochs[  6/  8] val_loss: 1.29986, val_acc: 54.360
epochs[  7/  8] val_loss: 1.28519, val_acc: 54.230
epochs[  8/  8] val_loss: 1.29764, val_acc: 54.960


In [27]:
net.eval()
current = 0
total_test_loss = []
with torch.no_grad():
    for test_img, test_label in test_loader:
        test_img = test_img.cuda()
        test_label = test_label.cuda()

        pred = net(test_img)
        total_test_loss.append(criterion(pred, test_label).item())
        current += (pred.argmax(1)==test_label).type(torch.float).sum().item()

print("testdataset test_loss: %.5f, test_acc: %.3f"%(np.mean(total_val_loss), current/10000*100))

testdataset test_loss: 1.29764, test_acc: 55.360


# base model: Resnet

In [None]:
device = torch.device("cuda")

base_net = resnet18().to(device=device, dtype=torch.float32)
optimizer = torch.optim.Adam(base_net.parameters(), lr=0.001, weight_decay=1e-8)
criterion = nn.CrossEntropyLoss()

In [None]:
MinTrainLoss=999
epochs=10

cif_train_loss = []
cif_val_loss = []

cif_train_acc=[]
cif_val_acc=[]

for epoch in range(1, epochs+1):
    total_train_loss = []
    total_val_loss = []

    # Train
    base_net.train()
    for input_img, label in train_loader:
        input_img = input_img.cuda()
        label = label.cuda()
        optimizer.zero_grad()

        pred_img = base_net(input_img)
        loss = criterion(pred_img, label)
        loss.backward()
        optimizer.step()

    # Verify
    base_net.eval()
    current = 0
    with torch.no_grad():
        for val_img, val_label in val_loader:
            val_img = val_img.cuda()
            val_label = val_label.cuda()

            pred = base_net(val_img)
            total_val_loss.append(criterion(pred, val_label).item())
            current += (pred.argmax(1)==val_label).type(torch.float).sum().item()

    val_loss.append(np.mean(total_val_loss))
    val_acc.append(current/10000*100)

    print("epochs[%3d/%3d] val_loss: %.5f, val_acc: %.3f"%(epoch, epochs, val_loss[-1],val_acc[-1]))

epochs[  1/ 10] val_loss: 1.17634, val_acc: 58.910
epochs[  2/ 10] val_loss: 1.00609, val_acc: 65.320
epochs[  3/ 10] val_loss: 0.90740, val_acc: 68.400
epochs[  4/ 10] val_loss: 0.87473, val_acc: 69.910
epochs[  5/ 10] val_loss: 0.79948, val_acc: 72.510
epochs[  6/ 10] val_loss: 0.78960, val_acc: 73.760
epochs[  7/ 10] val_loss: 0.82434, val_acc: 73.630
epochs[  8/ 10] val_loss: 0.87633, val_acc: 73.830
epochs[  9/ 10] val_loss: 0.87718, val_acc: 74.570
epochs[ 10/ 10] val_loss: 0.88476, val_acc: 74.710


In [None]:
base_net.eval()
current = 0
total_test_loss = []
with torch.no_grad():
    for test_img, test_label in test_loader:
        test_img = test_img.cuda()
        test_label = test_label.cuda()

        pred = base_net(test_img)
        total_test_loss.append(criterion(pred, test_label).item())
        current += (pred.argmax(1)==test_label).type(torch.float).sum().item()

print("testdataset test_loss: %.5f, test_acc: %.3f"%(np.mean(total_val_loss), current/10000*100))

testdataset test_loss: 0.88476, test_acc: 74.410


# Pre-train ViT

In [None]:
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), transforms.Resize([224,224])])
training_data = torchvision.datasets.CIFAR10(root='/content/drive/MyDrive/RPData', train=True, download=True, transform=transform)
testing_data = torchvision.datasets.CIFAR10(root='/content/drive/MyDrive/RPData', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
train_db, val_db = utils.data.random_split(training_data, [40000,10000])

train_loader = DataLoader(train_db, batch_size=64, shuffle=True)
val_loader = DataLoader(val_db, batch_size=64, shuffle=True)
test_loader = DataLoader(testing_data, batch_size=64, shuffle=True)

In [None]:
class MyViT(nn.Module):
    def __init__(self, model, target_size):
        super(MyViT, self).__init__()
        self.model = model
        for param in self.model.parameters():
          param.requires_grad = False
        n_features = self.model.heads.head.in_features
        #
        self.model.heads.head = nn.Linear(n_features, target_size)

    def forward(self, x):
        x = self.model(x)
        return x

In [None]:
device = torch.device("cuda")
model = torch.hub.load('pytorch/vision', 'vit_b_16', weights='ViT_B_16_Weights.DEFAULT')

pred_vit = MyViT(model,10).to(device=device, dtype=torch.float32)
optimizer = torch.optim.Adam([{'params': pred_vit.model.heads.head.parameters()}], lr=0.01, weight_decay=1e-8)
criterion = nn.CrossEntropyLoss()

Using cache found in /root/.cache/torch/hub/pytorch_vision_main


In [None]:
MinTrainLoss=999
epochs=3

val_acc=[]
val_loss=[]

for epoch in range(1, epochs+1):
    total_train_loss = []
    total_val_loss = []

    # Train
    pred_vit.train()
    for input_img, label in train_loader:
        input_img = input_img.cuda()
        label = label.cuda()
        optimizer.zero_grad()

        pred_img = pred_vit(input_img)
        loss = criterion(pred_img, label)
        loss.backward()
        optimizer.step()

    # Verify
    pred_vit.eval()
    current = 0
    with torch.no_grad():
        for val_img, val_label in val_loader:
            val_img = val_img.cuda()
            val_label = val_label.cuda()

            pred = pred_vit(val_img)
            total_val_loss.append(criterion(pred, val_label).item())
            current += (pred.argmax(1)==val_label).type(torch.float).sum().item()

    val_loss.append(np.mean(total_val_loss))
    val_acc.append(current/10000*100)

    print("epochs[%3d/%3d] val_loss: %.5f, val_acc: %.3f"%(epoch, epochs, val_loss[-1],val_acc[-1]))

epochs[  1/  3] val_loss: 0.34762, val_acc: 92.710
epochs[  2/  3] val_loss: 0.32649, val_acc: 93.710
epochs[  3/  3] val_loss: 0.39757, val_acc: 92.970


In [None]:
pred_vit.eval()
current = 0
total_test_loss = []
with torch.no_grad():
    for test_img, test_label in test_loader:
        test_img = test_img.cuda()
        test_label = test_label.cuda()

        pred = pred_vit(test_img)
        total_test_loss.append(criterion(pred, test_label).item())
        current += (pred.argmax(1)==test_label).type(torch.float).sum().item()

print("testdataset test_loss: %.5f, test_acc: %.3f"%(np.mean(total_val_loss), current/10000*100))

testdataset test_loss: 0.39757, test_acc: 93.160


In [None]:
!/opt/bin/nvidia-smi

Mon Nov 27 13:02:09 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   68C    P0    30W /  70W |   3241MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
torch.cuda.get_device_name(0)

'Tesla T4'