In [None]:
import os
import time
import numpy as np
from PIL import Image
from torch.utils.data.dataset import Dataset
from tqdm import tqdm
from torchvision import transforms
from torchvision import models
import torch
from torch.utils.tensorboard import SummaryWriter
from torch import nn
from torch.utils.data.dataloader import DataLoader
from matplotlib import pyplot as plt
from numpy import printoptions
import requests
import tarfile
import random
import json
import torch.optim as optim

In [None]:
# Fix all seeds to make experiments reproducible
torch.manual_seed(2024)
torch.cuda.manual_seed(2024)
np.random.seed(2024)
random.seed(2024)
torch.backends.cudnn.deterministic = True

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # device 객체

In [None]:
# Simple dataloader and label binarization, 
# that is converting test labels into binary arrays of length 6
# (number of classes) with 1 in places of applicable labels).
class MpstDataset(Dataset):
    def __init__(self, data_path, anno_path, transforms, split_type):
        self.transforms = transforms
        self.split_type= split_type
        with open(anno_path) as fp:
            json_data = json.load(fp)
        samples = json_data['samples']
        self.classes = json_data['labels']

        self.imgs = [] # 이미지 이름
        self.annos = [] # 태그 이름
        self.annos_idx = []
        self.data_path = data_path
        print('loading', anno_path)
        for sample in samples:
            if(sample['genre'])=='Action':
                label=0
            elif(sample['genre'])=='Comedy':
                label=1
            elif(sample['genre'])=='Crime':
                label=2 
            elif(sample['genre'])=='Drama':
                label=3
            elif(sample['genre'])=='Horror':
                label=4    
            elif(sample['genre'])=='Romance':
                label=5                                       
            self.imgs.append(sample['id'])
            self.annos.append(sample['genre'])
            self.annos_idx.append(label)
        for item_id in range(len(self.annos)):
            item = self.annos[item_id]
            vector = [cls in item for cls in self.classes]
            self.annos[item_id] = np.array(vector, dtype=float) # labeling one-hot encoding

    def __getitem__(self, item):
        anno = self.annos[item] 
        anno_idx = self.annos_idx[item]   
        img_path = os.path.join(self.data_path, self.split_type, self.imgs[item])
        img_path += '.png'
        img = Image.open(img_path).convert('RGB')
        if self.transforms is not None:
            img = self.transforms(img)
        return img, anno, anno_idx
    

    def __len__(self):
        return len(self.imgs)

In [None]:
import yaml
with open('./config/cfg.yaml') as f:
    cfg = yaml.safe_load(f)
    
img_folder = cfg['gr_img']['path']
split_train='train'
split_val='val'
dataset_train = MpstDataset(img_folder, os.path.join(img_folder, 'train', 'train.json'), None, split_train)
dataset_val = MpstDataset(img_folder, os.path.join(img_folder, 'val', 'val.json'), None, split_val)
print(f"train dataset len: {len(dataset_train)}")
print(f"val dataset len: {len(dataset_val)}")

In [None]:
# Calculate label distribution for the entire dataset (train + val)
samples = dataset_val.annos + dataset_train.annos
samples = np.array(samples)
with printoptions(precision=3, suppress=True):
    class_counts = np.sum(samples, axis=0)
    # Sort labels according to their frequency in the dataset.
    sorted_ids = np.array([i[0] for i in sorted(enumerate(class_counts), key=lambda x: x[1])], dtype=int)
    print('Label Tag Distribution (count, class name):', list(zip(class_counts[sorted_ids].astype(int), np.array(dataset_val.classes)[sorted_ids])))
    # plt.barh(range(len(dataset_val.classes)), width=class_counts[sorted_ids])
    plt.barh(range(len(dataset_val.classes)), width=class_counts[sorted_ids])
    plt.yticks(range(len(dataset_val.classes)), np.array(dataset_val.classes)[sorted_ids])
    plt.gca().margins(y=0)
    plt.grid()
    plt.title('Label Tag Distribution')
    plt.show()

In [None]:
# Initialize the training parameters.
num_workers = 0 # 8 # Number of CPU processes for data preprocessing
lr = 1e-4 # Learning rate
batch_size = 6
save_freq = 1 # Save checkpoint frequency (epochs)
max_epoch_number = 100 # Number of epochs for training 

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

In [None]:
# Save path for checkpoints
save_path = './ckpt_genre/test3'
# Save path for logs
logdir = './logs_genre/test3'

# Run tensorboard
%load_ext tensorboard
%tensorboard --logdir {logdir}

In [None]:
def checkpoint_save(model, save_path, epoch):
    f = os.path.join(save_path, 'checkpoint-{:06d}.pth'.format(epoch)) # checkpoint-000000.pth
    if 'module' in dir(model):
        torch.save(model.module.state_dict(), f)
    else:
        torch.save(model.state_dict(), f)
    print('saved checkpoint:', f)

In [None]:
transforms_train = transforms.Compose([
    transforms.Resize((224, 224)),
    # transforms.RandomHorizontalFlip(), # data augmentation
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # normalization
])

transforms_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:

img_folder=cfg['gr_img']['path']
split_train='train'
split_val='val'

# Initialize the dataloaders for training.
val_annotations = os.path.join(img_folder, 'val', 'val.json')
train_annotations = os.path.join(img_folder, 'train','train.json')

val_dataset = MpstDataset(img_folder, val_annotations, transforms_val, split_val)
train_dataset = MpstDataset(img_folder, train_annotations, transforms_train, split_train)

train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True,
                              # drop_last=True
                              )
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=num_workers)

num_train_batches = int(np.ceil(len(train_dataset) / batch_size))
num_val_batches = int(np.ceil(len(val_dataset) / batch_size))
print(f"num_train_batches: {num_train_batches}")
print(f"num_val_batches: {num_val_batches}")

In [None]:
n1=0
# batch load
iterator = iter(val_loader)

# batch print
inputs, classes, c_idx = next(iterator)
print(inputs)
print(classes)
print(c_idx)

In [None]:
import copy

def train(model, optimizer, train_loader, val_loader,  
          device, logger):
    model.to(device)
 
    criterion = nn.CrossEntropyLoss()
    best_val_loss = 100.0
    
    best_loss_model = None
    final_model = None
    
    iteration = 0
    best_loss_epoch = 0 

    start_time = time.time()
    for epoch in range(1, max_epoch_number+1):
        
        if epoch==1:
            best_loss_model = copy.deepcopy(model)
            final_model = copy.deepcopy(model)  
            
        running_corrects = 0
        model.train()
        train_loss = []
        
        for imgs, labels, lb_idx in tqdm(iter(train_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)
            lb_idx = lb_idx.to(device)
            # print(f"labels: {labels}")
            optimizer.zero_grad()
            output = model(imgs)
            _, preds = torch.max(output, 1)
            loss = criterion(output, labels.type(torch.float))
            batch_loss_value = loss.item()
            loss.backward()
            optimizer.step()
            running_corrects += torch.sum(preds == lb_idx)
            # tensorboard-1 
            logger.add_scalar('train_loss/iter', batch_loss_value, iteration) # train_loss per iter
            train_loss.append(batch_loss_value) # 
            iteration+=1
            
        # validation per epoch           
        _val_loss  = validation(model, criterion, val_loader, device, logger, iteration)
        epoch_acc = running_corrects / len(train_dataset) * 100.
        
        if _val_loss < best_val_loss: # best_loss model save
            best_val_loss = _val_loss
            best_loss_model = copy.deepcopy(model)
            best_loss_epoch = epoch
            checkpoint_save(best_loss_model, save_path, best_loss_epoch)
            
       # tensorboard-2: val_loss/epoch
        logger.add_scalar('val_loss/epoch' ,  _val_loss, epoch)
        _train_loss = np.mean(train_loss)
        # tensorboard-3: val_loss/iter
        logger.add_scalar('_train_loss/iter', _train_loss, iteration) 
        # tensorboard-4: val_loss/epoch
        logger.add_scalar('_train_loss/epoch', _train_loss, epoch) 
        
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] ')
        print(' Acc: {:.4f}% Time: {:.4f}s'.format( epoch_acc, time.time() - start_time))
        final_model=model
        
    return final_model, epoch,  best_loss_model, best_loss_epoch

def validation(model, criterion, val_loader, device, logger, iteration):
    model.eval()
    val_loss = []
    with torch.no_grad():
        for imgs, labels, lb_idx in tqdm(iter(val_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)
            lb_idx = lb_idx.to(device)
            probs = model(imgs)
            loss = criterion(probs, labels.type(torch.float)) # BCE loss
            val_loss.append(loss.item())
        _val_loss = np.mean(val_loss) # val_loader loss per batch
    
    return _val_loss

In [None]:
print(logdir)
# Tensoboard logger
logger = SummaryWriter(logdir)
print(save_path)

In [None]:
model = models.vgg19(pretrained=True)
print(model)

In [None]:
# Initialize the model
# model = models.resnet34(pretrained=True)
# num_features = model.fc.in_features
# model.fc = nn.Linear(num_features, 6)
# model = models.mobilenet_v2(pretrained=True)
# num_features = model.classifier[1].in_features
# model.classifier[1] = nn.Linear(num_features, 6)
num_features = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_features, 6)
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Switch model to the training mode and move it to GPU.
model.train()

# If more than one GPU is available we can use both to speed up the training.
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)

In [None]:
os.makedirs(save_path, exist_ok=True)

In [None]:
final_model, epoch, best_loss_model, best_loss_epoch \
= train(model, optimizer, train_loader, val_loader, 
        device, logger=logger)

In [None]:
print(save_path)

In [None]:
checkpoint_save(model, save_path, epoch)

In [None]:
# Run inference on the test data
# test preprocessing
from torchvision import transforms
import os

test_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])
split_test='test'

img_folder=cfg['gr_img']['path']
split_test='test'

test_annotations = os.path.join(img_folder, 'test', 'test.json')
test_dataset = MpstDataset(img_folder, test_annotations, test_transform, split_test)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, 
                                num_workers=num_workers)


In [None]:
# Calculate label distribution for the entire dataset (train + val)
samples = test_dataset.annos
samples = np.array(samples)
with printoptions(precision=3, suppress=True):
    class_counts = np.sum(samples, axis=0)
    # Sort labels according to their frequency in the dataset.
    sorted_ids = np.array([i[0] for i in sorted(enumerate(class_counts), key=lambda x: x[1])], dtype=int)
    print('Label distribution (count, class name):', list(zip(class_counts[sorted_ids].astype(int), np.array(test_dataset.classes)[sorted_ids])))
    # plt.barh(range(len(dataset_val.classes)), width=class_counts[sorted_ids])
    plt.barh(range(len(test_dataset.classes)), width=class_counts[sorted_ids])
    plt.yticks(range(len(test_dataset.classes)), np.array(test_dataset.classes)[sorted_ids])
    plt.gca().margins(y=0)
    plt.grid()
    plt.title('Label distribution')
    plt.show()

In [None]:
print(len(test_dataset))

In [None]:
import numpy
import pandas as pd

# test_model.eval()
model.eval()
start_time = time.time()
name = []
predictions = []
true_data = []
meta_tagger = pd.DataFrame()

save_rt_path=cfg['genre_result']['path']
fc_name = save_rt_path + '\\' + 'test3.csv'
with torch.no_grad():
    running_loss = 0.
    running_corrects = 0
    idx = 0
    for inputs, labels, lb_idx in test_dataloader:
        inputs = inputs.to(device)
        lb_idx = lb_idx.to(device)
        # print(f"labels:{labels}")
        # name.append(test_dataloader.dataset.samples[idx])
        outputs=model(inputs)
        # outputs = test_model(inputs)
        # print(f"outputs size:{outputs.size()}") # ([6, 1000])
        # print(f"outputs:{outputs}")
        _, preds = torch.max(outputs, 1) # 입력된 tensor의 dim=1을 기준으로 최댓값 반환
        # print(f"preds:{preds}") # preds:tensor([4, 4, 1, 1, 2, 2], device='cuda:0')
        # loss = criterion(outputs, labels)

        # running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == lb_idx)
        

        class_names=['Action', 'Comedy', 'Crime', 'Drama', 'Horror', 'Romance']
        
        for i in range(len(preds)):
            print(f'filename: {test_dataset.imgs[idx]}, [Prediction]: {class_names[preds[i]]}, (Answer): {class_names[lb_idx[i]]}')
            # 결과 시각화  
            # imshow(inputs.cpu().data[i], title='Prediction: ' + class_names[preds[i]])
            name.append(test_dataset.imgs[idx])
            predictions.append(class_names[preds[i]])
            
            true_data.append(class_names[lb_idx[i]])
            idx+=1

    # epoch_loss = running_loss / len(test_dataset)
    epoch_acc = running_corrects / len(test_dataset) * 100.
    print('[Test Phase] Acc: {:.4f}% Time: {:.4f}s'.format(epoch_acc, time.time() - start_time))
    print(idx)

meta_tagger['title'] = pd.Series(name)
meta_tagger['Prediction Genre'] = pd.Series(predictions)
meta_tagger['Real Genre'] = pd.Series(true_data)
meta_tagger.to_csv(fc_name, encoding='utf-8-sig', index=True)