In [1]:
import numpy as np
import pandas as pd

import torch
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data.dataset import Dataset
from tqdm import tqdm
from modules.datasets.ImageContrastiveLossShopeeDataset import ImageContrastiveLossShopeeDataset
from modules.losses.ContrastiveLoss import ContrastiveLoss
from modules.models.SiameseNet import SiameseNet
from modules.models.ResNet18EmbeddingsShopeeNet import ResNet18EmbeddingsShopeeNet
import modules.utils.dataset_utils as dataset_utils

import os

In [2]:
DATA_FOLDER = './shopee-product-matching/'

train_df = dataset_utils.get_dataset(DATA_FOLDER, is_test=False)
train_df = dataset_utils.add_target(train_df)
train_df.head()

Unnamed: 0,posting_id,image,image_phash,title,label_group,target
0,train_129225211,./shopee-product-matching/train_images/0000a68...,94974f937d4c2433,Paper Bag Victoria Secret,249114794,"[train_129225211, train_2278313361]"
1,train_3386243561,./shopee-product-matching/train_images/0003978...,af3f9460c2838f0f,"Double Tape 3M VHB 12 mm x 4,5 m ORIGINAL / DO...",2937985045,"[train_3386243561, train_3423213080]"
2,train_2288590299,./shopee-product-matching/train_images/000a190...,b94cb00ed3e50f78,Maling TTS Canned Pork Luncheon Meat 397 gr,2395904891,"[train_2288590299, train_3803689425]"
3,train_2406599165,./shopee-product-matching/train_images/00117e4...,8514fc58eafea283,Daster Batik Lengan pendek - Motif Acak / Camp...,4093212188,"[train_2406599165, train_3342059966]"
4,train_3369186413,./shopee-product-matching/train_images/00136d1...,a6f319f924ad708c,Nescafe \xc3\x89clair Latte 220ml,3648931069,"[train_3369186413, train_921438619]"


In [3]:
train_cl_df = dataset_utils.get_contrastive_loss_dataset(train_df)
train_cl_df

11014it [00:02, 3843.28it/s]


Unnamed: 0,posting_id_1,posting_id_2,image_1,image_2,title_1,title_2,label
0,train_129225211,train_2278313361,./shopee-product-matching/train_images/0000a68...,./shopee-product-matching/train_images/f83b49a...,Paper Bag Victoria Secret,PAPER BAG VICTORIA SECRET,1
1,train_2278313361,train_7437096,./shopee-product-matching/train_images/f83b49a...,./shopee-product-matching/train_images/c897e0d...,PAPER BAG VICTORIA SECRET,Abacaga ; Cara Praktis Belajar Membaca Untuk A...,0
2,train_3386243561,train_3423213080,./shopee-product-matching/train_images/0003978...,./shopee-product-matching/train_images/8cbe4bf...,"Double Tape 3M VHB 12 mm x 4,5 m ORIGINAL / DO...",Double Tape VHB 3M ORIGINAL 12mm x 4.5mm Busa ...,1
3,train_3386243561,train_1836586325,./shopee-product-matching/train_images/0003978...,./shopee-product-matching/train_images/9af7695...,"Double Tape 3M VHB 12 mm x 4,5 m ORIGINAL / DO...",Alat Cukur Alis Elektrik Mini Tanpa Rasa Sakit,0
4,train_2288590299,train_3803689425,./shopee-product-matching/train_images/000a190...,./shopee-product-matching/train_images/75dbd1e...,Maling TTS Canned Pork Luncheon Meat 397 gr,Maling Ham Pork Luncheon Meat TTS 397gr,1
...,...,...,...,...,...,...,...
167497,train_866113781,train_3058662787,./shopee-product-matching/train_images/fd58c6f...,./shopee-product-matching/train_images/d1e8a0e...,Kedaung Cangkir / Mug Enamel Loreng 10 cm,(COD) Dompet Panjang Import 6088 Dompet Tali P...,0
167498,train_2244662893,train_3281898016,./shopee-product-matching/train_images/fd68da1...,./shopee-product-matching/train_images/fe2f96d...,LAMPU HURUF A-Z DAN ANGKA 0-9 \xe2\x9d\xa4\xef...,LAMPU HURUF A-Z DAN ANGKA 0-9 TINGGI 16 CM,1
167499,train_2244662893,train_1962509900,./shopee-product-matching/train_images/fd68da1...,./shopee-product-matching/train_images/1760302...,LAMPU HURUF A-Z DAN ANGKA 0-9 \xe2\x9d\xa4\xef...,Serba Grosir Murah Sarung Tangan Anti Panas un...,0
167500,train_4221982820,train_4063409014,./shopee-product-matching/train_images/ff512b2...,./shopee-product-matching/train_images/ff7180b...,Sprei Lady Rose 180x200 King terlaris Keroppi,Sprei king ladyrose size 180x200 kerokeroppi,1


In [4]:
train_cl_df = train_cl_df.sample(frac=1)
train_cl_df

Unnamed: 0,posting_id_1,posting_id_2,image_1,image_2,title_1,title_2,label
77757,train_2657855879,train_274975325,./shopee-product-matching/train_images/6ba55ac...,./shopee-product-matching/train_images/9ed9f32...,CL BL 01 70cm,FREE BOX & BATERAI JAM TANGAN PRIA GSHOCK GST-...,1
21690,train_1251926547,train_1359948092,./shopee-product-matching/train_images/0233d15...,./shopee-product-matching/train_images/3c969c9...,"VIVA Cleansing Milk, VIVA Air Mawar, VIVA Face...",VIVA AIR MAWAR 100ml | AIR MAWAR VIVA PENYEGAR...,1
124057,train_327920855,train_177136644,./shopee-product-matching/train_images/67754ca...,./shopee-product-matching/train_images/e24c61b...,{BOX}MASKER GOLD ANTI AGING BPOM,Hanasui Anti Aging Peel Of Mask Gold,1
51876,train_3978236021,train_389580299,./shopee-product-matching/train_images/07df304...,./shopee-product-matching/train_images/be70421...,7in1 Stick Cup Standing Balon + Tatakan / Tian...,Kewpie Salad Dressing 200ml,0
43363,train_1472702975,train_3655323022,./shopee-product-matching/train_images/81d8fb2...,./shopee-product-matching/train_images/a4f59ba...,Souvenir pisau sarung apel kemas plastik,Pisau Sarung Motif Apel,1
...,...,...,...,...,...,...,...
105804,train_3967330559,train_916038872,./shopee-product-matching/train_images/8b94005...,./shopee-product-matching/train_images/046d98c...,(SOLD OUT) MASKYOURNEEDS 25gram,PART 1 BELLA SQUARE POLLYCOTTON,0
65817,train_911623964,train_3939843351,./shopee-product-matching/train_images/7ddaf0f...,./shopee-product-matching/train_images/cc8b63c...,\xe2\x9d\x84 TMALL \xe2\x9d\x84 R108 Kaos Kaki...,\xe2\x9c\xa7YUKSHOPING\xe2\x9c\xa7 R108 Kaos K...,1
145641,train_2874063158,train_2649914633,./shopee-product-matching/train_images/983120d...,./shopee-product-matching/train_images/7ffd9f6...,Isi daya USB LED\xe2\x9c\x85Timbangan Badan De...,sepatu anak laki laki dan perempuan motif tali...,0
159993,train_2278189641,train_526948393,./shopee-product-matching/train_images/9b1f173...,./shopee-product-matching/train_images/377fc2c...,Tas wanita chale dan keth 2 in 1 / Tas Jinjing...,Boci mantap jiwa ori/geprek/korean spicy,0


In [5]:
train_cl_df.groupby('label').count()

Unnamed: 0_level_0,posting_id_1,posting_id_2,image_1,image_2,title_1,title_2
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,83751,83751,83751,83751,83751,83751
1,83751,83751,83751,83751,83751,83751


In [6]:
# train_cl_df.to_csv('./shopee-product-matching/contrastive_loss_data.csv', index=False)

In [7]:

images_dataset = ImageContrastiveLossShopeeDataset(
    train_cl_df['image_1'].values,
    train_cl_df['image_2'].values,
    train_cl_df['label'].values,
    transforms.Compose([
        transforms.Resize((512, 512)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]))

train_percent = 0.8

total_len = len(images_dataset)
train_len = int(train_percent * total_len)
val_len = total_len - train_len
train_dataset, val_dataset = torch.utils.data.random_split(images_dataset, [train_len, val_len])

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True,
    num_workers=4
)

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=64,
    shuffle=True,
    num_workers=4
)

In [8]:
def train(model, device, train_loader, optimizer, criterion, epoch):
    model.train()
    running_loss = 0
    i = 1
    tk0 = tqdm(train_loader, total=len(train_loader))
    for input_1, input_2, target in tk0:
        input_1, input_2, target = input_1.to(device), input_2.to(device), target.to(device)
        optimizer.zero_grad()
        output_1, output_2 = model(input_1, input_2)
        loss = criterion(output_1, output_2, target)
        loss.backward()
        optimizer.step()
        running_loss += loss
        running_loss /= i
        i += 1
        tk0.set_postfix(Train_Loss=running_loss,Epoch=epoch,LR=optimizer.param_groups[0]['lr'])

        # if batch_idx % 100 == 0:
        #     print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        #         epoch, batch_idx * len(input_1), len(train_loader.dataset),
        #                100. * batch_idx / len(train_loader), loss.item()))

def test(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    tk0 = tqdm(test_loader, total=len(test_loader))
    with torch.no_grad():
        for input_1, input_2, target in tk0:
            input_1, input_2, target = input_1.to(device), input_2.to(device), target.to(device)
            output_1, output_2 = model(input_1, input_2)
            test_loss += criterion(output_1, output_2, target, reduction='sum').item()  # sum up batch loss
            # pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            # correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))



In [9]:
device = torch.device("cpu")

resnet18_emb_model = ResNet18EmbeddingsShopeeNet().to(device)
siamese_net = SiameseNet(resnet18_emb_model).to(device)

criterion = ContrastiveLoss()
optimizer = optim.Adam(siamese_net.parameters(), lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.99)
num_epochs = 5

In [10]:
for epoch in range(1, num_epochs):
    train(siamese_net, device, train_loader, optimizer, criterion,epoch)
    test(siamese_net, device, val_loader, criterion)
    scheduler.step()

  0%|          | 0/2094 [00:00<?, ?it/s][W NNPACK.cpp:51] Could not initialize NNPACK! Reason: Unsupported hardware.
  1%|          | 11/2094 [1:00:38<191:22:38, 330.75s/it, Epoch=1, LR=0.01, Train_Loss=tensor(0.0195, grad_fn=<DivBackward0>)]


KeyboardInterrupt: 