## Movie Poster

In [5]:
import numpy as np
classes = list(dataframe.columns.drop(['Id', 'Genre']))
labels = np.array(dataframe.drop(['Id', 'Genre'], axis=1))

In [22]:
from pathlib import Path

image_paths = list(Path('./dataset/movie_poster/Images/').glob('**/*.jpg'))

In [23]:
len(image_paths)

7867

In [33]:
image_paths[0]

PosixPath('dataset/movie_poster/Images/tt0084058.jpg')

In [30]:
image_dir = Path('./dataset/movie_poster/Images/')
image_names = list(dataframe['Id'])

In [34]:
import cv2
image_path = str(image_dir.joinpath(image_names[0] + '.jpg'))
image = cv2.imread(image_path)
cv2.imshow('a', image)
cv2.waitKey()
cv2.destroyAllWindows()

In [95]:
import pandas as pd
import numpy as np
import shutil
import cv2

from pathlib import Path


dataframe = pd.read_csv('./dataset/movie_poster/origin/train.csv')

image_dir = Path('./dataset/movie_poster/origin/Images/')
image_extent = '.jpg'

image_names = list(dataframe['Id'])
image_paths = [str(image_dir.joinpath(image_name + image_extent)) for image_name in image_names]
image_genres = list(dataframe['Genre'])
labels = np.array(dataframe.drop(['Id', 'Genre'], axis=1))
column_names = list(dataframe.columns)
classes = list(dataframe.drop(['Id', 'Genre'], axis=1).columns)

In [2]:
items = [[image_path, image_genre, label] for (image_path, image_genre, label) in zip(image_paths, image_genres, labels)]

In [4]:
train_ratio = 0.64
valid_ratio = 0.16

train_index = (0, int(round(len(items) * train_ratio)))
valid_index = (int(round(len(items) * train_ratio)), int(round(len(items) * (train_ratio + valid_ratio))))
test_index = (int(round(len(items) * (train_ratio + valid_ratio))), len(items))

dirname = Path('./dataset/movie_poster/')

train_dir = dirname.joinpath('train').joinpath('images')
valid_dir = dirname.joinpath('valid').joinpath('images')
test_dir = dirname.joinpath('test').joinpath('images')

train_csv = dirname.joinpath('train').joinpath('train.csv')
valid_csv = dirname.joinpath('valid').joinpath('valid.csv')
test_csv = dirname.joinpath('test').joinpath('test.csv')

for _dirname in [train_dir, valid_dir, test_dir]:
    if not _dirname.exists():
        _dirname.mkdir(parents=True)

for data_index, _dirname, csv_path in zip([train_index, valid_index, test_index], [train_dir, valid_dir, test_dir], [train_csv, valid_csv, test_csv]):
    info_data = {column_name: [] for column_name in column_names}
    for (image_path, image_genre, label) in items[data_index[0]: data_index[1]]:
        data_values = [Path(image_path).stem, image_genre]
        data_values.extend(label)
        for column_name, value in zip(info_data.keys(), data_values):
            info_data[column_name].append(value)
        shutil.copy(str(image_path), str(_dirname))
    df = pd.DataFrame(info_data)
    df.to_csv(str(csv_path), index=False)

## Dataloader

In [55]:
import cv2
import torch
import random
import numpy as np
import pandas as pd
from pathlib import Path
from torch.utils.data import Dataset


class MovieGenreDataset(Dataset):
    def __init__(self, datadir, csv_path, image_extent, image_size, inner_size, transforms=None):
        super(MovieGenreDataset, self).__init__()
        self.inner_size = inner_size
        self.image_size = image_size
        self.transforms = transforms if transforms is not None else []

        df = pd.read_csv(str(csv_path))
        image_paths = [str(Path(datadir).joinpath(image_name + image_extent)) for image_name in list(df['Id'])]
        genres = list(df['Genre'])
        labels = np.array(df.drop(['Id', 'Genre'], axis=1)).tolist()
        self.classes = list(df.drop(['Id', 'Genre'], axis=1).columns)
        self.data = [(image_path, genre, label) for image_path, genre, label in zip(image_paths, genres, labels)]
        print(f'- {Path(datadir).parents[0].stem}: {len(self.data)}')

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        image_path, genre, label = self.data[idx]
        sample = cv2.imread(str(image_path))
        sample = self._resize(sample, self.inner_size)
        
        for transform in random.sample(self.transforms, k=random.randint(0, len(self.transforms))):
            sample = transform(image=sample)

        sample = cv2.resize(sample, dsize=self.image_size)
        sample = np.ascontiguousarray(sample)
        sample = torch.from_numpy(sample).permute(2, 0, 1).to(torch.float)

        if (sample == sample.mean()).all():
            sample = torch.zeros_like(sample)
        else:
            sample = (sample - sample.mean()) / sample.std()
        
        target = torch.from_numpy(np.asarray(label)).to(torch.float)

        return sample, target, str(image_path)

    def _resize(self, image, size):
        ratio = size / min(image.shape[:2])
        image = cv2.resize(image, dsize=(0, 0), fx=ratio, fy=ratio)
        return image

In [56]:
import imgaug.augmenters as iaa

datadir = './dataset/movie_poster/train/images/'
csv_path = './dataset/movie_poster/train/train.csv'
image_extent = '.jpg'
image_size = (400, 400)
inner_size = 512
transforms = [
    iaa.Fliplr(p=0.5),
    iaa.Flipud(p=0.5),
]

train_dataset = MovieGenreDataset(datadir, csv_path, image_extent, image_size, inner_size, transforms)

- train: 4643


In [57]:
from torch.utils.data import DataLoader

train_loader = DataLoader(dataset=train_dataset, batch_size=4, shuffle=True)

In [58]:
train_iter = iter(train_loader)
samples, targets, image_paths = train_iter.next()

In [62]:
targets

tensor([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.]])

## Model

In [75]:
import torch
from torch import nn
from torchvision import models


class MobileNetV2(nn.Module):
    def __init__(self, num_classes, pretrained, features_fixed):
        super(MobileNetV2, self).__init__()
        self.model = models.mobilenet_v2(pretrained=pretrained)
        self.model.features.requires_grad_(not features_fixed)
        self.model.classifier[1] = torch.nn.Linear(1280, num_classes)

    def forward(self, x):
        return self.model(x)

    def state_dict(self):
        return self.model.state_dict()

    def load_state_dict(self, state_dict):
        self.model.load_state_dict(state_dict)

In [81]:
model = MobileNetV2(num_classes=25, pretrained=True, features_fixed=False)

In [120]:
a = torch.FloatTensor(size=(2, 3, 224, 224))
output = model(a)

In [121]:
output

tensor([[-0.3779, -0.0399, -0.5283, -0.2736,  0.0342,  0.0615, -0.4062,  0.2205,
          0.0389, -0.0725, -0.0208, -0.0050, -0.0278, -0.0926,  0.2643, -0.5408,
         -0.1046, -0.2488,  0.0401,  0.1819, -0.4694,  0.1143, -0.2829, -0.1789,
          0.0703],
        [-0.1589, -0.0784, -0.6739, -0.4565,  0.1364,  0.1637, -0.1166, -0.0226,
          0.1003, -0.4011,  0.2260,  0.1622,  0.9326, -0.0278, -0.3583, -0.5246,
          0.0470, -0.2838,  0.4462,  0.2196, -0.5058,  0.0734, -0.1418, -0.4234,
          0.0944]], grad_fn=<AddmmBackward>)

In [122]:
pred = (output.sigmoid() > 0.6).to(torch.float)
truth = torch.ones_like(pred)

In [123]:
output.sigmoid()

tensor([[0.4066, 0.4900, 0.3709, 0.4320, 0.5086, 0.5154, 0.3998, 0.5549, 0.5097,
         0.4819, 0.4948, 0.4988, 0.4930, 0.4769, 0.5657, 0.3680, 0.4739, 0.4381,
         0.5100, 0.5453, 0.3848, 0.5286, 0.4298, 0.4554, 0.5176],
        [0.4604, 0.4804, 0.3376, 0.3878, 0.5341, 0.5408, 0.4709, 0.4943, 0.5251,
         0.4011, 0.5563, 0.5405, 0.7176, 0.4931, 0.4114, 0.3718, 0.5118, 0.4295,
         0.6097, 0.5547, 0.3762, 0.5183, 0.4646, 0.3957, 0.5236]],
       grad_fn=<SigmoidBackward>)

In [134]:
(output.sigmoid() >= torch.sort(output.sigmoid(), dim=1, des)[0][:,-3:-2]).to(torch.float)

tensor([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0.,
         1., 0., 0., 0., 0., 0., 0.]])

In [124]:
torch.sort(output.sigmoid(), dim=1)

torch.return_types.sort(
values=tensor([[0.3680, 0.3709, 0.3848, 0.3998, 0.4066, 0.4298, 0.4320, 0.4381, 0.4554,
         0.4739, 0.4769, 0.4819, 0.4900, 0.4930, 0.4948, 0.4988, 0.5086, 0.5097,
         0.5100, 0.5154, 0.5176, 0.5286, 0.5453, 0.5549, 0.5657],
        [0.3376, 0.3718, 0.3762, 0.3878, 0.3957, 0.4011, 0.4114, 0.4295, 0.4604,
         0.4646, 0.4709, 0.4804, 0.4931, 0.4943, 0.5118, 0.5183, 0.5236, 0.5251,
         0.5341, 0.5405, 0.5408, 0.5547, 0.5563, 0.6097, 0.7176]],
       grad_fn=<SortBackward>),
indices=tensor([[15,  2, 20,  6,  0, 22,  3, 17, 23, 16, 13,  9,  1, 12, 10, 11,  4,  8,
         18,  5, 24, 21, 19,  7, 14],
        [ 2, 15, 20,  3, 23,  9, 14, 17,  0, 22,  6,  1, 13,  7, 16, 21, 24,  8,
          4, 11,  5, 19, 10, 18, 12]]))

In [139]:
torch.sort(output.sigmoid(), dim=1, descending=True)[0][..., 2:3]

tensor([[0.5453],
        [0.5563]], grad_fn=<SliceBackward>)

## Scenes

In [1]:
classes = {"desert": 0, "mountains": 1, "sea": 2, "sunset": 3, "trees": 4}

In [2]:
import json
import numpy as np
import pandas as pd
from pathlib import Path
from natsort import natsorted

image_paths = Path('../dataset/multi_labels/scenes/images/').glob('*.*')
image_names = natsorted([image_path.stem for image_path in image_paths])
print(f'number of images: {len(image_names)}')

with open(file='../dataset/multi_labels/scenes/labels.json', mode='r') as fp:
    lines = np.asarray([eval(line) for line in fp.readlines()])
    lines[lines == -1] = 0

for class_name, index in classes.items():
    classes[class_name] = lines[:, index].tolist()

csv_data = {**{'names': image_names}, **classes}
df = pd.DataFrame(csv_data)
df.to_csv('../dataset/multi_labels/scenes/train.csv', index=False)
del df

number of images: 2000


In [3]:
data_frame = pd.read_csv('../dataset/multi_labels/scenes/train.csv')

In [4]:
data_frame.head(10)

Unnamed: 0,names,desert,mountains,sea,sunset,trees
0,1,1,0,0,0,0
1,2,1,0,0,0,0
2,3,1,0,0,0,0
3,4,1,1,0,0,0
4,5,1,0,0,0,0
5,6,1,0,0,0,0
6,7,1,1,0,0,0
7,8,1,0,0,0,0
8,9,1,0,0,0,0
9,10,1,1,0,0,0


In [5]:
import pandas as pd
import numpy as np
import shutil
import cv2

from pathlib import Path


dataframe = pd.read_csv('../dataset/multi_labels/scenes/train.csv')

image_dir = Path('../dataset/multi_labels/scenes/images/')
image_extent = '.jpg'

image_names = list(dataframe['names'])
image_paths = [str(image_dir.joinpath(str(image_name) + image_extent)) for image_name in image_names]
labels = np.array(dataframe.drop(['names'], axis=1))
column_names = list(dataframe.columns)
classes = list(dataframe.drop(['names'], axis=1).columns)

items = [[image_path, label] for (image_path, label) in zip(image_paths, labels)]

In [7]:
import random
random.shuffle(items)

train_ratio = 0.64
valid_ratio = 0.16

train_index = (0, int(round(len(items) * train_ratio)))
valid_index = (int(round(len(items) * train_ratio)), int(round(len(items) * (train_ratio + valid_ratio))))
test_index = (int(round(len(items) * (train_ratio + valid_ratio))), len(items))

dirname = Path('../dataset/multi_labels/scenes/')

train_dir = dirname.joinpath('train').joinpath('images')
valid_dir = dirname.joinpath('valid').joinpath('images')
test_dir = dirname.joinpath('test').joinpath('images')

train_csv = dirname.joinpath('train').joinpath('train.csv')
valid_csv = dirname.joinpath('valid').joinpath('valid.csv')
test_csv = dirname.joinpath('test').joinpath('test.csv')

for _dirname in [train_dir, valid_dir, test_dir]:
    if not _dirname.exists():
        _dirname.mkdir(parents=True)

for data_index, _dirname, csv_path in zip([train_index, valid_index, test_index], [train_dir, valid_dir, test_dir], [train_csv, valid_csv, test_csv]):
    info_data = {column_name: [] for column_name in column_names}
    for (image_path, label) in items[data_index[0]: data_index[1]]:
        data_values = [Path(image_path).stem]
        data_values.extend(label)
        for column_name, value in zip(info_data.keys(), data_values):
            info_data[column_name].append(value)
        shutil.copy(str(image_path), str(_dirname))
    df = pd.DataFrame(info_data)
    df.to_csv(str(csv_path), index=False)

## Inference

In [35]:
from torch import nn
from torchvision import models


class MobileNetV2(nn.Module):
    def __init__(self, num_classes, pretrained, features_fixed):
        super(MobileNetV2, self).__init__()
        self.model = models.mobilenet_v2(pretrained=pretrained)
        self.model.features.requires_grad_(not features_fixed)
        self.model.classifier[1] = nn.Linear(1280, num_classes)

    def forward(self, x):
        return self.model(x)

    def state_dict(self):
        return self.model.state_dict()

    def load_state_dict(self, state_dict):
        self.model.load_state_dict(state_dict)

In [40]:
import torch
import torchvision

In [52]:
a = torch.FloatTensor(size=(3, 224, 224))
a

tensor([[[0.0000e+00, 0.0000e+00, 1.8896e-40,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         ...,
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00]],

        [[0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         ...,
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.000

In [53]:
a = torchvision.transforms.ToPILImage()(a)
a = torchvision.transforms.ToTensor()(a)
a.shape
# a = torchvision.transforms

torch.Size([3, 224, 224])

In [58]:
std = torch.from_numpy(np.array([1, 2, 3])).unsqueeze(dim=1).unsqueeze(dim=1).to(torch.float)

In [59]:
std.shape

torch.Size([3, 1, 1])

In [60]:
import imgaug.augmenters as iaa
pad_to_square = iaa.PadToSquare(position='right-bottom')

In [61]:
a = np.zeros(shape=(2, 3), dtype=np.uint8)

In [62]:
pad_to_square(image=a)

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]], dtype=uint8)

In [18]:
from pathlib import Path

file_path = '/home/phungpx/Downloads/dangkykinhdoanh_1095_060721.txt'
dirname = Path(file_path).parent.joinpath(Path(file_path).stem)
dirname

PosixPath('/home/phungpx/Downloads/dangkykinhdoanh_1095_060721')

In [25]:
subfolder = Path('a/b')
file_url = 'c/d/e.f'
subfolder.joinpath(Path(file_url).stem).with_suffix('.json')

PosixPath('a/b/e.json')

In [10]:
with open(file='/home/phungpx/Downloads/dangkykinhdoanh_1095_060721.txt', mode='r', encoding='utf-8') as f:
    lines = f.readlines()

In [12]:
len(lines)

1095

In [26]:
line = lines[1]

In [28]:
data = line.split('#')[3]

In [31]:
import json
data = json.loads(data)

In [33]:
data['imagePath']

'/image/tagging/GPKD/2021/03/công ty cổ phần/DAE1BA82-00DA-4017-BBB3-6898C5840F66.jpg'

In [39]:
import re

def no_accent_vietnamese(s):
    s = re.sub(r'[àáạảãâầấậẩẫăằắặẳẵ]', 'a', s)
    s = re.sub(r'[ÀÁẠẢÃĂẰẮẶẲẴÂẦẤẬẨẪ]', 'A', s)
    s = re.sub(r'[èéẹẻẽêềếệểễ]', 'e', s)
    s = re.sub(r'[ÈÉẸẺẼÊỀẾỆỂỄ]', 'E', s)
    s = re.sub(r'[òóọỏõôồốộổỗơờớợởỡ]', 'o', s)
    s = re.sub(r'[ÒÓỌỎÕÔỒỐỘỔỖƠỜỚỢỞỠ]', 'O', s)
    s = re.sub(r'[ìíịỉĩ]', 'i', s)
    s = re.sub(r'[ÌÍỊỈĨ]', 'I', s)
    s = re.sub(r'[ùúụủũưừứựửữ]', 'u', s)
    s = re.sub(r'[ƯỪỨỰỬỮÙÚỤỦŨ]', 'U', s)
    s = re.sub(r'[ỳýỵỷỹ]', 'y', s)
    s = re.sub(r'[ỲÝỴỶỸ]', 'Y', s)
    s = re.sub(r'[Đ]', 'D', s)
    s = re.sub(r'[đ]', 'd', s)
    s = re.sub(r'[-]', ' ', s)
    return s

print(no_accent_vietnamese("Việt Nam Đất Nước Con Người"))
print(no_accent_vietnamese("Welcome to Vietnam !"))
print(no_accent_vietnamese("VIỆT NAM ĐẤT NƯỚC CON NGƯỜI"))

Viet Nam Dat Nuoc Con Nguoi
Welcome to Vietnam !
VIET NAM DAT NUOC CON NGUOI
