In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import os
from tqdm import tqdm
import time
import copy
from PIL import Image
from imblearn.over_sampling import RandomOverSampler

In [2]:
device = torch.device('cuda')

In [3]:
transform = transforms.Compose([        # Defining a variable transforms
 transforms.Resize(256),                # Resize the image to 256×256 pixels
 transforms.CenterCrop(224),            # Crop the image to 224×224 pixels about the center
 transforms.ToTensor(),                 # Convert the image to PyTorch Tensor data type
 transforms.Normalize(                  # Normalize the image
 mean=[0.485, 0.456, 0.406],            # Mean and std of image as also used when training the network
 std=[0.229, 0.224, 0.225]      
)])

In [4]:
df = pd.read_csv('/kaggle/input/classify-by-brand-dataset-fixed/classify_by_brand/new_image_classification.csv')
sampler = RandomOverSampler(random_state=42)
X = df['image_path'].values
X = X.reshape(-1,1)
Y = df['class_label']

X_res, Y_res = sampler.fit_resample(X,Y)

X_list = map(lambda x: x[0], X_res)
df = pd.concat([pd.Series(X_list, dtype='string', name='image_path'), Y_res], axis=1)
df

Unnamed: 0,image_path,class_label
0,/kaggle/input/classify-by-brand-dataset/classi...,not himalaya
1,/kaggle/input/classify-by-brand-dataset/classi...,himalaya
2,/kaggle/input/classify-by-brand-dataset/classi...,not himalaya
3,/kaggle/input/classify-by-brand-dataset/classi...,not himalaya
4,/kaggle/input/classify-by-brand-dataset/classi...,not himalaya
...,...,...
11889,/kaggle/input/classify-by-brand-dataset/classi...,himalaya
11890,/kaggle/input/classify-by-brand-dataset/classi...,himalaya
11891,/kaggle/input/classify-by-brand-dataset/classi...,himalaya
11892,/kaggle/input/classify-by-brand-dataset/classi...,himalaya


In [5]:
class Himalaya_dataset(Dataset):
    def __init__(self, img_dir, label_dir, transform, target_transform=None):
#         self.img_labels = pd.read_csv(label_dir)
        self.img_labels = df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path)
        label = self.img_labels.iloc[idx, 1]
        label = torch.Tensor([clas[label]])
        image = self.transform(image)
        return image, label

In [6]:
img_dir = "/kaggle/input/classify-by-brand-dataset/classify_by_brand"
train_dataset = Himalaya_dataset(img_dir, "/kaggle/input/classify-by-brand-dataset-fixed/classify_by_brand/new_image_classification.csv", transform)
cv_dataset = Himalaya_dataset(img_dir, "/kaggle/input/brand-cv/image_classification3.csv", transform)
dataset_sizes = {"train": len(train_dataset), "cv": len(cv_dataset)}

In [7]:
bs = 200
train = torch.utils.data.DataLoader(train_dataset, batch_size = bs, shuffle = True)
cv = torch.utils.data.DataLoader(cv_dataset, shuffle = True)
dataloaders = {"train": train, "cv": cv}
clas = {"not himalaya": 0., "himalaya": 1.}

In [8]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'cv']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in tqdm(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)
                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [9]:
data_dir = '/kaggle/input/classify-by-brand-dataset-fixed/classify_by_brand/classify_by_brand_dataset'

model = models.resnet18(pretrained = True)
for param in model.parameters():
    param.requires_grad = False
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 1)
model = nn.Sequential(model, nn.Sigmoid())
model.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 148MB/s]


Sequential(
  (0): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_runnin

In [10]:
criterion = nn.BCELoss()
opt = torch.optim.SGD(model[0].fc.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=7, gamma=0.1)
model = train_model(model, criterion, opt,
                         exp_lr_scheduler, num_epochs=25)

Epoch 0/24
----------


100%|██████████| 60/60 [01:59<00:00,  1.99s/it]


train Loss: 0.5695 Acc: 99.5544


100%|██████████| 11894/11894 [01:23<00:00, 143.13it/s]


cv Loss: 0.4304 Acc: 0.5000

Epoch 1/24
----------


100%|██████████| 60/60 [00:52<00:00,  1.15it/s]


train Loss: 0.3865 Acc: 99.6257


100%|██████████| 11894/11894 [01:24<00:00, 140.88it/s]


cv Loss: 0.3473 Acc: 0.5000

Epoch 2/24
----------


100%|██████████| 60/60 [00:51<00:00,  1.16it/s]


train Loss: 0.3332 Acc: 99.6257


100%|██████████| 11894/11894 [01:23<00:00, 142.80it/s]


cv Loss: 0.3120 Acc: 0.5000

Epoch 3/24
----------


100%|██████████| 60/60 [00:52<00:00,  1.15it/s]


train Loss: 0.3056 Acc: 99.5455


100%|██████████| 11894/11894 [01:23<00:00, 142.44it/s]


cv Loss: 0.2908 Acc: 0.5000

Epoch 4/24
----------


100%|██████████| 60/60 [00:51<00:00,  1.15it/s]


train Loss: 0.2887 Acc: 99.6257


100%|██████████| 11894/11894 [01:23<00:00, 143.06it/s]


cv Loss: 0.2773 Acc: 0.5000

Epoch 5/24
----------


100%|██████████| 60/60 [00:51<00:00,  1.16it/s]


train Loss: 0.2759 Acc: 99.6079


100%|██████████| 11894/11894 [01:23<00:00, 143.02it/s]


cv Loss: 0.2673 Acc: 0.5000

Epoch 6/24
----------


100%|██████████| 60/60 [00:50<00:00,  1.18it/s]


train Loss: 0.2674 Acc: 99.5811


100%|██████████| 11894/11894 [01:23<00:00, 142.48it/s]


cv Loss: 0.2569 Acc: 0.5000

Epoch 7/24
----------


100%|██████████| 60/60 [00:51<00:00,  1.17it/s]


train Loss: 0.2613 Acc: 99.5187


100%|██████████| 11894/11894 [01:24<00:00, 140.45it/s]


cv Loss: 0.2564 Acc: 0.5000

Epoch 8/24
----------


100%|██████████| 60/60 [00:53<00:00,  1.13it/s]


train Loss: 0.2591 Acc: 99.5544


100%|██████████| 11894/11894 [01:26<00:00, 137.47it/s]


cv Loss: 0.2553 Acc: 0.5000

Epoch 9/24
----------


100%|██████████| 60/60 [00:53<00:00,  1.12it/s]


train Loss: 0.2586 Acc: 99.5455


100%|██████████| 11894/11894 [01:23<00:00, 141.89it/s]


cv Loss: 0.2549 Acc: 0.5000

Epoch 10/24
----------


100%|██████████| 60/60 [00:52<00:00,  1.13it/s]


train Loss: 0.2566 Acc: 99.6257


100%|██████████| 11894/11894 [01:23<00:00, 142.70it/s]


cv Loss: 0.2544 Acc: 0.5000

Epoch 11/24
----------


100%|██████████| 60/60 [00:51<00:00,  1.16it/s]


train Loss: 0.2567 Acc: 99.5455


100%|██████████| 11894/11894 [01:23<00:00, 142.34it/s]


cv Loss: 0.2531 Acc: 0.5000

Epoch 12/24
----------


100%|██████████| 60/60 [00:51<00:00,  1.17it/s]


train Loss: 0.2568 Acc: 99.5811


100%|██████████| 11894/11894 [01:23<00:00, 143.30it/s]


cv Loss: 0.2517 Acc: 0.5000

Epoch 13/24
----------


100%|██████████| 60/60 [00:52<00:00,  1.13it/s]


train Loss: 0.2568 Acc: 99.5900


100%|██████████| 11894/11894 [01:23<00:00, 142.00it/s]


cv Loss: 0.2523 Acc: 0.5000

Epoch 14/24
----------


100%|██████████| 60/60 [00:51<00:00,  1.17it/s]


train Loss: 0.2551 Acc: 99.5633


100%|██████████| 11894/11894 [01:24<00:00, 141.03it/s]


cv Loss: 0.2521 Acc: 0.5000

Epoch 15/24
----------


100%|██████████| 60/60 [00:51<00:00,  1.17it/s]


train Loss: 0.2551 Acc: 99.5990


100%|██████████| 11894/11894 [01:24<00:00, 141.09it/s]


cv Loss: 0.2517 Acc: 0.5000

Epoch 16/24
----------


100%|██████████| 60/60 [00:52<00:00,  1.15it/s]


train Loss: 0.2545 Acc: 99.5277


100%|██████████| 11894/11894 [01:24<00:00, 140.28it/s]


cv Loss: 0.2514 Acc: 0.5000

Epoch 17/24
----------


100%|██████████| 60/60 [00:51<00:00,  1.15it/s]


train Loss: 0.2550 Acc: 99.6168


100%|██████████| 11894/11894 [01:23<00:00, 142.35it/s]


cv Loss: 0.2517 Acc: 0.5000

Epoch 18/24
----------


100%|██████████| 60/60 [00:51<00:00,  1.16it/s]


train Loss: 0.2559 Acc: 99.5990


100%|██████████| 11894/11894 [01:24<00:00, 140.35it/s]


cv Loss: 0.2520 Acc: 0.5000

Epoch 19/24
----------


100%|██████████| 60/60 [00:52<00:00,  1.14it/s]


train Loss: 0.2549 Acc: 99.5722


100%|██████████| 11894/11894 [01:23<00:00, 141.71it/s]


cv Loss: 0.2510 Acc: 0.5000

Epoch 20/24
----------


100%|██████████| 60/60 [00:52<00:00,  1.14it/s]


train Loss: 0.2563 Acc: 99.4831


100%|██████████| 11894/11894 [01:24<00:00, 140.86it/s]


cv Loss: 0.2517 Acc: 0.5000

Epoch 21/24
----------


100%|██████████| 60/60 [00:52<00:00,  1.15it/s]


train Loss: 0.2551 Acc: 99.5455


100%|██████████| 11894/11894 [01:25<00:00, 139.50it/s]


cv Loss: 0.2522 Acc: 0.5000

Epoch 22/24
----------


100%|██████████| 60/60 [00:51<00:00,  1.16it/s]


train Loss: 0.2547 Acc: 99.5900


100%|██████████| 11894/11894 [01:24<00:00, 141.49it/s]


cv Loss: 0.2514 Acc: 0.5000

Epoch 23/24
----------


100%|██████████| 60/60 [00:52<00:00,  1.13it/s]


train Loss: 0.2557 Acc: 99.6346


100%|██████████| 11894/11894 [01:25<00:00, 139.17it/s]


cv Loss: 0.2515 Acc: 0.5000

Epoch 24/24
----------


100%|██████████| 60/60 [00:52<00:00,  1.14it/s]


train Loss: 0.2545 Acc: 99.5722


100%|██████████| 11894/11894 [01:25<00:00, 139.89it/s]

cv Loss: 0.2511 Acc: 0.5000

Training complete in 57m 54s
Best val Acc: 0.000000



