In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2 as cv2
from tqdm.notebook import tqdm
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import pytorch_lightning as pl
from models import ContrastiveLossModel

In [2]:
BATCH_SIZE_TRIPLET = 64
num_epochs_triplet = 15
learning_rate_triplet = 0.001
with_scheduler_triplet = True
use_combos = True
combos_mult = 1024

In [3]:
comparable_data = pd.read_csv("comparable_data.csv")
comparable_data.head(3)

Unnamed: 0,title,price,cat_1,cat_2,cat_3,caracteristics,img_ref,target,dealer
0,Беговая дорожка UNIXFIT MX-990X,120890,Беговые дорожки,UNIXFIT,UNIXFIT MX-990X,Тип электрическая Уровень базовый Габариты (...,images/begovye_dorozhki/1_begdorozhki_1349.jpeg,begovye_dorozhki,begdorozhki
1,"Беговая дорожка Proxima Ivetta HRC, Арт. PROT-219",139990,Беговые дорожки,Proxima,"Proxima Ivetta HRC, Арт. PROT-219",Тип электрическая Уровень базовый Габариты (...,images/begovye_dorozhki/2_begdorozhki_1463.jpeg,begovye_dorozhki,begdorozhki
2,"Беговая дорожка UNIXFIT MX-990 AC (10,1"" TFT)",159890,Беговые дорожки,UNIXFIT,"UNIXFIT MX-990 AC (10,1"" TFT)",Тип электрическая Уровень полупрофессиональны...,images/begovye_dorozhki/3_begdorozhki_1638.jpeg,begovye_dorozhki,begdorozhki


In [4]:
def prepare_data(path_to_df):
    df = pd.read_csv(path_to_df)
    updated_df = pd.DataFrame()
    for _, row in tqdm(df.iterrows(), total=df.shape[0]):
        try:
            img = cv2.imread(row["img_ref"])
            img.shape
        except:
            img = plt.imread(row["img_ref"])
        if len(img.shape) < 3:
            img_fin = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
        elif img.shape[2] == 4:
            img_fin = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)
        else:
            img_fin = img
        label = row['target']
        new_row = {'label_string': label,
            'image': img_fin}
        updated_df = pd.concat([updated_df, pd.DataFrame([new_row])], ignore_index=True)
    data_classes = list(set(updated_df["label_string"].tolist()))
    updated_df["label"] = updated_df["label_string"].apply(data_classes.index)
    updated_df = updated_df.drop(["label_string"], axis=1)
    updated_df = updated_df[["label", "image"]]

    return updated_df

In [5]:
df = prepare_data("comparable_data.csv")

  0%|          | 0/13718 [00:00<?, ?it/s]

In [6]:
default_transform = transforms.Compose(
    [
        transforms.ToPILImage(),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]
)

In [7]:
class ContrastiveLossDataset(Dataset):
    def __init__(
        self, data_df: pd.DataFrame, transform=None, use_all_combos=False, combos_mult=2
    ):
        super(ContrastiveLossDataset, self).__init__()
        dataset = []
        labels_positive = {}
        labels_negative = {}

        # for each label create a set of same label images.
        for i in list(data_df.label.unique()):
            labels_positive[i] = data_df[data_df.label == i]["image"].to_numpy()
        # for each label create a set of image of different label.
        for i in list(data_df.label.unique()):
            labels_negative[i] = data_df[data_df.label != i]["image"].to_numpy()

        for i, row in tqdm(data_df.iterrows(), total=len(data_df)):
            # label and image of the index for each row in df
            label = row["label"]

            if use_all_combos:
                # probability of same label image == 0.5
                if np.random.randint(0, 2) == 0:
                    for i in range(int(len(labels_positive[label]) / combos_mult)):
                        second = labels_positive[label][i]
                        dis = 0.0
                        first = row["image"]
                        if transform is not None:
                            first = transform(first.astype(np.float32))
                            second = transform(second.astype(np.float32))
                        dataset.append((first, second, dis, label))
                else:
                    for i in range(int(len(labels_positive[label]) / combos_mult)):
                        second = labels_negative[label][
                            np.random.randint(0, len(labels_negative[label]))
                        ]
                        first = row["image"]
                        dis = 1.0
                        if transform is not None:
                            first = transform(first.astype(np.float32))
                            second = transform(second.astype(np.float32))
                        dataset.append((first, second, dis, label))
            else:
                if np.random.randint(0, 2) == 0:
                    second = labels_positive[label][
                        np.random.randint(0, len(labels_positive[label]))
                    ]
                    dis = 0.0
                    first = row["image"]
                    if transform is not None:
                        first = transform(first.astype(np.float32))
                        second = transform(second.astype(np.float32))
                    dataset.append((first, second, dis, label))
                else:
                    second = labels_negative[label][
                        np.random.randint(0, len(labels_negative[label]))
                    ]
                    first = row["image"]
                    dis = 1.0
                    if transform is not None:
                        first = transform(first.astype(np.float32))
                        second = transform(second.astype(np.float32))
                    dataset.append((first, second, dis, label))
        self.dataset = dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, i):
        return self.dataset[i]

In [8]:
train_dataset_with_combos = ContrastiveLossDataset(
    df, default_transform, use_all_combos=use_combos, combos_mult=combos_mult
)

print(f"Train dataset shape: {len(train_dataset_with_combos)}")

  0%|          | 0/13718 [00:00<?, ?it/s]

Train dataset shape: 32399


In [9]:
# shuffle true for train data to randomly create batches
trainLoader_with_combos = DataLoader(
    train_dataset_with_combos,
    batch_size=BATCH_SIZE_TRIPLET,
    shuffle=True,
    drop_last=True,
)

In [10]:
trainer = pl.Trainer(max_epochs=num_epochs_triplet)
model = ContrastiveLossModel(num_epochs_triplet, learning_rate_triplet)
trainer.fit(model, train_dataloaders=trainLoader_with_combos)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\Kate\Desktop\cv_project\.venv\Lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
c:\Users\Kate\Desktop\cv_project\.venv\Lib\site-packages\pytorch_lightning\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
You are using a CUDA device ('NVIDIA GeForce RTX 3060 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=15` reached.


In [11]:
trainer.save_checkpoint("image_emb_model_1024_combos_v1.ckpt")