# Train HaNoi Image on resnet152

In [1]:
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.models as models
import pandas as pd
import os
import PIL
import torch.nn as nn
import numpy as np
from natsort import natsorted
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device 

device(type='cuda')

In [3]:
dp = pd.read_csv('images/multi-label.csv')
# dp.drop('Image', axis='columns', inplace=True)
dp[:]

Unnamed: 0,Image,HoGuom,HoTay,ThapRua,CauTheHuc,BuuDien,VuonHoa,ChuaTranQuoc,DenQuanThanh,KhachSan,CongVienNuoc
0,0.jpg,1,0,0,1,0,0,0,0,0,0
1,1.jpg,1,0,0,1,0,0,0,0,0,0
2,2.jpg,1,0,0,1,0,0,0,0,0,0
3,3.jpg,1,0,1,0,1,0,0,0,0,0
4,4.jpg,1,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
657,657.jpg,0,1,0,0,0,1,0,0,0,0
658,658.jpg,0,1,0,0,0,1,0,0,0,0
659,659.jpg,0,1,0,0,0,1,0,0,0,0
660,660.jpg,0,1,0,0,0,1,0,0,0,0


In [4]:
columns = dp.columns.tolist()
columns = [folder.strip() for folder in columns]
columns = np.array(columns[1:])
columns

array(['HoGuom', 'HoTay', 'ThapRua', 'CauTheHuc', 'BuuDien', 'VuonHoa',
       'ChuaTranQuoc', 'DenQuanThanh', 'KhachSan', 'CongVienNuoc'],
      dtype='<U12')

In [5]:
file_names = [dir for dir in os.listdir('Image Dataset')  if dir.endswith((".jpg", ".jpeg"))]
file_names = file_names[:dp.shape[0]]
file_names = [os.path.join('../Image Dataset', dir) for dir in file_names]
file_names[:]

['../Image Dataset\\000.jpg',
 '../Image Dataset\\001.jpg',
 '../Image Dataset\\002.jpg',
 '../Image Dataset\\003.jpg',
 '../Image Dataset\\004.jpg',
 '../Image Dataset\\005.jpg',
 '../Image Dataset\\006.jpg',
 '../Image Dataset\\007.jpg',
 '../Image Dataset\\008.jpg',
 '../Image Dataset\\009.jpg',
 '../Image Dataset\\010.jpg',
 '../Image Dataset\\011.jpg',
 '../Image Dataset\\012.jpg',
 '../Image Dataset\\013.jpg',
 '../Image Dataset\\014.jpg',
 '../Image Dataset\\015.jpg',
 '../Image Dataset\\016.jpg',
 '../Image Dataset\\017.jpg',
 '../Image Dataset\\018.jpg',
 '../Image Dataset\\019.jpg',
 '../Image Dataset\\020.jpg',
 '../Image Dataset\\021.jpg',
 '../Image Dataset\\022.jpg',
 '../Image Dataset\\023.jpg',
 '../Image Dataset\\024.jpg',
 '../Image Dataset\\025.jpg',
 '../Image Dataset\\026.jpg',
 '../Image Dataset\\027.jpg',
 '../Image Dataset\\028.jpg',
 '../Image Dataset\\029.jpg',
 '../Image Dataset\\030.jpg',
 '../Image Dataset\\031.jpg',
 '../Image Dataset\\032.jpg',
 '../Image

In [6]:
class ImageDataset(Dataset):
    def __init__(self, file_paths, labels, device = 'cpu', transform=transforms.ToTensor()):
        self.file_paths = file_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, index):
        file_path = self.file_paths[index]
        image = Image.open(file_path).convert("RGB")
        row = self.labels.loc[index]
        image = self.transform(image)
        return image.to(device), torch.Tensor(row[1:]).to(device)
    
transform = transforms.Compose([
    transforms.RandomEqualize(), # Equalize the histogram of the given image randomly with a given probability (default p=0.5)
    transforms.Resize((224, 224)),  # Resize ảnh thành kích thước (256, 256)
    transforms.ToTensor(),  # Chuyển ảnh thành tensor
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),  # Chuẩn hóa ảnh
    transforms.RandomHorizontalFlip(), #Phép biến đổi ngẫu nhiên quyết định liệu ảnh sẽ được lật ngang hay không. (default p=0.5)
    transforms.RandomRotation(20) #xoay ảnh một góc ngẫu nhiên trong khoảng từ -20 đến 20 độ.
])

In [7]:
img_train, img_val, labels_train, labels_val = train_test_split(file_names, dp, test_size=0.2, random_state=42)
labels_train.reset_index(drop=True, inplace=True)
labels_val.reset_index(drop=True, inplace=True)
labels_train

Unnamed: 0,Image,HoGuom,HoTay,ThapRua,CauTheHuc,BuuDien,VuonHoa,ChuaTranQuoc,DenQuanThanh,KhachSan,CongVienNuoc
0,18.jpg,1,0,1,0,0,0,0,0,0,0
1,249.jpg,0,1,0,0,0,1,0,0,0,0
2,395.jpg,0,1,0,0,0,0,0,1,0,0
3,192.jpg,1,0,0,0,1,0,0,0,0,0
4,61.jpg,1,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
524,71.jpg,1,0,1,0,1,0,0,0,0,0
525,106.jpg,1,0,0,1,0,0,0,0,0,0
526,270.jpg,0,0,0,0,0,0,0,0,0,1
527,435.jpg,0,1,0,0,0,0,0,1,0,0


In [8]:
train_set = ImageDataset(file_paths = img_train, labels = labels_train, device=device, transform=transform)
val_set = ImageDataset(file_paths = img_val, labels = labels_val, device=device, transform=transform)

train_loader = DataLoader(
    train_set,
    batch_size=4,
    shuffle=True,
    # num_workers=2
)

val_loader = DataLoader(
    val_set,
    batch_size=4,
    shuffle=True,
    # num_workers=2
)

def pre_process(*args):
    list_item = []
    for item in args:
        list_item.append(item.to(device))
    return list_item

In [9]:
def get_model(out_feature, device):
    model = models.resnet152(pretrained=True)
    num_ftrs = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_ftrs, out_feature),
        nn.Sigmoid()
    )
    return model.to(device=device)


In [11]:
path = 'my_project/train-cls'
model = get_model(len(columns), device=device)

In [12]:
def train(net, train_size, val_size, trainloader, valloader, epochs, criterion, optimizer, threshold=0.5,
          path = 'my_project/train-cls'):

  len_log_folder = len(os.listdir(path))
  writer = SummaryWriter(f'{path}/try{len_log_folder}')
  best_val_acc = 0.0

  for epoch in range(epochs):
    training_loss = 0.0
    val_loss = 0.0
    train_correct = 0.0
    val_correct = 0.0
    train_acc = 0.0
    val_acc = 0.0
    bi_train_acc = 0.0
    bi_val_acc = 0.0

    net.train()
    for i, data in enumerate(trainloader, 0):
      inputs, labels = data
      # print(labels)
  
      optimizer.zero_grad()
      outputs = net(inputs)
      
      #print(outputs, labels)
      predicted = (outputs.data >= threshold).float()
      loss = criterion(outputs, labels) 
      
      # print(training_loss, loss.item(), inputs.shape[0])
      training_loss += loss.item() * inputs.shape[0]

      train_correct = (predicted == labels).float()
      train_acc += train_correct.int().prod(dim=1).sum()
      bi_train_acc += train_correct.float().mean(dim=1).sum()
      
      loss.backward()
      optimizer.step()
      

    net.eval()
    with torch.no_grad():
      for i, data in enumerate(valloader, 0):
        inputs, labels = data
        outputs = net(inputs)

        predicted = (outputs.data >= threshold).float()
        loss = criterion(outputs, labels)
        
        val_loss += loss.item() * inputs.shape[0]
        
        val_correct = (predicted == labels).float()
        val_acc += val_correct.int().prod(dim=1).sum()
        bi_val_acc += val_correct.float().mean(dim=1).sum()

    training_loss /= train_size
    val_loss /= val_size

    bi_train_acc /= train_size
    bi_val_acc /= val_size

    train_acc /= train_size
    val_acc /= val_size
    if val_acc > best_val_acc:
      best_val_acc = val_acc
      torch.save(net, f'{path}/try{len_log_folder}/best.pt')

    print(f"epoch: {epoch}, training loss: {training_loss}, val_loss: {val_loss}, " + \
          f"train_acc: {train_acc}, val_acc: {val_acc} " + \
          f"bi_train_acc: {bi_train_acc}, bi_val_acc: {bi_val_acc}" )
    writer.add_scalar("train/loss", training_loss, epoch)
    writer.add_scalar("train/acc", train_acc, epoch)
    writer.add_scalar("train/bi_acc", bi_train_acc, epoch)

    writer.add_scalar("val/loss", val_loss, epoch)
    writer.add_scalar("val/acc", val_acc, epoch)
    writer.add_scalar("val/bi_acc", bi_val_acc, epoch)

    torch.cuda.empty_cache()
  torch.save(net, f'{path}/try{len_log_folder}/last.pt')
  writer.flush()
  writer.close()
  print('Finished Training')


In [13]:
lr, weight_decay, epochs = 1e-5, 5e-4, 20
criterion = nn.BCELoss()
params_1x = [param for name, param in model.named_parameters() if 'fc' not in str(name)]
optimizer = torch.optim.Adam([{'params':params_1x}, {'params': model.fc.parameters(), 'lr': lr*10}], lr=lr, weight_decay=weight_decay)

## Visualization using tensorboard
tensorboard --logdir=my_project/train-cls

In [14]:
train(model, len(train_set), len(val_set), train_loader, val_loader, epochs, criterion, optimizer, threshold=0.5)


epoch: 0, training loss: 0.3561507362048432, val_loss: 0.3039457242851867, train_acc: 0.21172022819519043, val_acc: 0.33834588527679443 bi_train_acc: 0.8606806397438049, bi_val_acc: 0.879699170589447
epoch: 1, training loss: 0.26471499025483664, val_loss: 0.21273836590125597, train_acc: 0.38185256719589233, val_acc: 0.3834586441516876 bi_train_acc: 0.9032137393951416, bi_val_acc: 0.9210525751113892
epoch: 2, training loss: 0.2141474955530383, val_loss: 0.1888002952686826, train_acc: 0.4253308176994324, val_acc: 0.451127827167511 bi_train_acc: 0.9173915982246399, bi_val_acc: 0.9278197884559631
epoch: 3, training loss: 0.1812774262942078, val_loss: 0.1222592842015893, train_acc: 0.4517958462238312, val_acc: 0.6015037894248962 bi_train_acc: 0.9257086515426636, bi_val_acc: 0.9533836245536804
epoch: 4, training loss: 0.16205725518877429, val_loss: 0.11567664693312761, train_acc: 0.5368620157241821, val_acc: 0.7218044996261597 bi_train_acc: 0.9353495240211487, bi_val_acc: 0.9601503610610962


## Inference

In [16]:
len_log_folder = len(os.listdir(path))
model = torch.load(f'{path}/try{len_log_folder-1}/best.pt').to("cpu")


In [17]:
kq = pd.read_csv(f"images/Test/Anh_Va_Nhan.csv")
kq.head()

Unnamed: 0,A,HG,HT,TR,CTH,BD,VH,CTQ,DQT,KS,CVN
0,72.jpg,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1,86.jpg,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
2,97.jpg,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
3,56.jpg,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
4,59.jpg,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1


In [18]:
folder_test = "images/Test/"
for index, row in kq.iterrows():
    file_name = row["A"]
    if file_name == "A":
        break
    file_path = os.path.join(folder_test, file_name)
    row[1:] = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    img = Image.open(file_path).convert("RGB")
    img = transform(img).unsqueeze(0)
    result = model(img)
    indices = np.where(result >= 0.5)[1] + 1
    row[indices] = 1
    # print(kq, indices)
    # break

kq

Unnamed: 0,A,HG,HT,TR,CTH,BD,VH,CTQ,DQT,KS,CVN
0,72.jpg,0,0,0,0,0,0,0,0,0,1
1,86.jpg,1,0,0,0,1,0,0,0,0,0
2,97.jpg,0,0,0,0,0,0,0,0,0,1
3,56.jpg,1,0,0,1,0,0,0,0,0,0
4,59.jpg,1,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
196,113.jpg,0,0,0,0,1,0,0,0,0,0
197,35.jpg,1,0,0,0,1,0,0,0,0,0
198,159.jpg,0,0,0,0,0,0,0,0,0,0
199,91.jpg,0,1,0,0,0,1,0,0,0,0


In [19]:
kq.to_csv(f'{path}/try{len_log_folder-1}/Team_5_Submission.csv', index=False)