In [None]:
import numpy as np 
import pandas as pd 
import os
import zipfile
import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch.transforms import ToTensor
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
import cv2
import torch
from torch import nn
from albumentations.pytorch.transforms import ToTensorV2
from sklearn.model_selection import train_test_split
import torchvision.models as models
import torch.optim as optim

from torchvision.utils import make_grid
from torchvision import transforms as T

In [None]:
train_zip = zipfile.ZipFile('../input/dogs-vs-cats-redux-kernels-edition/train.zip','r')
test_zip = zipfile.ZipFile('../input/dogs-vs-cats-redux-kernels-edition/test.zip','r')
train_zip.extractall('./')
test_zip.extractall('./')
train_zip.close()
test_zip.close()

In [None]:
train_images_paths = [os.path.join("/kaggle/working/train/", i) for i in os.listdir("/kaggle/working/train/") ]
test_images_paths = [os.path.join("/kaggle/working/test/", i) for i in os.listdir("/kaggle/working/test/") ]

In [None]:
train = pd.DataFrame(train_images_paths)
train.columns = ['path']

test = pd.DataFrame(test_images_paths)
test.columns = ['path']

In [None]:
test.head()

In [None]:
train['label'] = train['path'].apply(lambda x: (x.find('cat') >=0)*1 )

In [None]:
train.head()

In [None]:
train, val, _,_  = train_test_split(train, train, test_size=0.1)

In [None]:
train= train.reset_index(drop=True)
val= val.reset_index(drop=True)

In [None]:
for i in range (10):
    plt.figure(figsize=(6,6))
    img = plt.imread(train.loc[i,'path'])
    plt.imshow(img)
    plt.title(train.loc[i,'path'])
    plt.show()

In [None]:
for i in range (10):
    img = plt.imread(train.loc[i,'path'])
    print(img.shape)

In [None]:
plt.figure(figsize=(6,6))
plt.hist(img[:,:,0].flatten(), bins = 100, label = 'r', color='r', alpha = 0.3)
plt.hist(img[:,:,1].flatten(), bins = 100, label = 'g', color='g', alpha = 0.3)
plt.hist(img[:,:,2].flatten(), bins = 100, label = 'b', color='b', alpha = 0.3)
plt.show()

In [None]:


def get_train_transform(size=224):
    return A.Compose([
        A.Resize(size, size),
        A.HorizontalFlip(p=0.5),
        A.ColorJitter (brightness=0.07, contrast=0.07,
                           saturation=0.1, hue=0.1, always_apply=False, p=0.3)
    ])

train_transform = get_train_transform()

test_transform = A.Compose([
    A.Resize(224,224)
])


to_tensor_transform = T.Compose([
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406],
                    [0.229, 0.224, 0.225]),
    ])


def get_inverse_transform(mean_ = [0.485, 0.456, 0.406], 
                          std_ = [0.229, 0.224, 0.225]):
    return T.Compose([T.Normalize(mean=[0., 0., 0.],
                                  std=[1. / std_[0], 1. / std_[1], 1. / std_[2]]),
                      T.Normalize(mean=[-mean_[0], -mean_[1], -mean_[2]],
                                  std=[1., 1., 1.]),
                      ])

inverse_transform = get_inverse_transform()

In [None]:
class TrainDataset(Dataset):
    def __init__(self, train, transform=None, is_test= False ):
        self.X = train['path']
        self.is_test = is_test
        self.transform = transform
        if not self.is_test:
            self.y = train['label']
    
    def __len__(self):
        return len(self.X) 
    
    def __getitem__(self, index):
        image = cv2.imread(self.X[index])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transform:
                image = self.transform(image=image)['image'] #.astype(np.float32)
        
        if self.is_test:
             return   to_tensor_transform(image) #.permute(2, 0 ,1).float()
        else: 
            label = self.y[index]
            return  to_tensor_transform(image) , label  # .permute(2, 0 ,1).float() 
      
    # H , W , C
    # C , H, W
    

In [None]:
train_dataset = TrainDataset(train, train_transform)
val_dataset = TrainDataset(val, train_transform)

train_dataloader =   DataLoader(train_dataset, batch_size = 16, shuffle = True)
val_dataloader =   DataLoader(val_dataset, batch_size = 16, shuffle = False)


In [None]:
def _imshow(img):
    print(img.shape)
    img = inverse_transform(img) #    # unnormalize
    npimg = img.numpy()
    npimg = np.transpose(npimg, (1, 2, 0))
    plt.figure(figsize=(20,20))
    plt.imshow(npimg)
    plt.show()

In [None]:
dataiter = iter(train_dataloader)
images, labels = dataiter.next()

In [None]:
#look at single image
plt.imshow(inverse_transform(images[0]).permute(1,2,0).numpy())

In [None]:
#make image grid for batch
dataiter = iter(train_dataloader)
images, labels = dataiter.next()
_imshow(make_grid(images))

In [None]:
test_dataset = TrainDataset(test, test_transform, is_test=True)
test_dataloader =  DataLoader(test_dataset, batch_size = 16, shuffle = False)

In [None]:
# # look at tensors shape
# for image_batch, label_batch in train_dataloader:
#     print(image_batch.shape, label_batch.shape )

In [None]:
resnet = models.resnet101(pretrained = True)

In [None]:
resnet

In [None]:
# transfer learning 
for param in resnet.parameters():
    param.requires_grad=False

![](https://www.topbots.com/wp-content/uploads/2020/05/cover_transfer_image_1600px_web.jpg)

In [None]:
class NN(nn.Module):
    def __init__(self, resnet_pretrained):
        super().__init__()
        self.resnet_pretrained = resnet_pretrained
        self.fc1 = nn.Linear(1000, 2)
        
    def forward(self, x):
        x = torch.relu(self.resnet_pretrained(x))
        x = self.fc1(x)
        return x

In [None]:
our_resnet_model = NN(resnet)

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available else 'cpu')

In [None]:
device

In [None]:
our_resnet_model = our_resnet_model.to(device)

In [None]:
class Trainer:
    def __init__(self, model, device):
        self.model = model
        self.device = device
    
        self.loss = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam( [param for param in self.model.parameters() if param.requires_grad],
                                    lr=0.001)
        
    def fit(self, train_dataloader, val_dataloader, num_epochs):
        total = 0
        correct= 0
        
        loss_values = []
        accuracy_values = []
        
        for epoch in range(num_epochs):
            self.model.train()
            batch_number = 0
            loss_values_batch = []
            for x, y in train_dataloader:
                self.optimizer.zero_grad()
                x = x.to(self.device)
                y = y.to(self.device)
                outputs = self.model(x)
                l = self.loss(outputs, y)  
                l.backward()
                self.optimizer.step()
                
                _, predicted = torch.max(outputs, 1)
                total += y.size(0)
                correct += (predicted == y).sum().item()
                loss_value = l.item()
                loss_values.append(loss_value)
                accuracy_values.append(correct/total)

               
                if batch_number%100 ==0:
                    print(f"batch number {batch_number}, loss_value: {loss_value}")
                    current_accuracy = correct/total
                    print(f"current_accuracy: {current_accuracy}")
#                     plt.figure(figsize=(5,5))
#                     plt.plot(loss_values_batch)
#                     plt.show()
                    
                batch_number+= 1
                
            epoch_accuracy = correct/total
            print(f"epoch_accuracy: {epoch_accuracy}")
            print(f"end of epoch {epoch}")
            #make validation 
            
            epoch_val_loss = []
            
            correct = 0
            total = 0
            self.model.eval()
            with torch.no_grad():
                for x, y in val_dataloader:
                    x = x.to(self.device)
                    y = y.to(self.device)
                    outputs = self.model(x)
                    l = self.loss(outputs, y)
                    loss_value = l.item()
                    epoch_val_loss.append(loss_value)
                    _, predicted = torch.max(outputs, 1)
                    total += y.size(0)
                    correct += (predicted == y).sum().item()
                    
            print(f"Total {total} Correct {correct} Accuracy {correct/total}")
        
#         plt.figure(figsize=(5,5))
#         plt.plot(loss_values_batch)
#         plt.show()

    def predict(self, test_dataloader):
        self.model.eval()
        predictions = torch.tensor([]) 
        with torch.no_grad():
            for x  in test_dataloader:
                x = x.to(self.device)
                outputs = torch.nn.functional.softmax(self.model(x))
                predictions = torch.cat([predictions,outputs.detach().cpu()])
        return predictions.numpy()
                
               
                


In [None]:
trainer = Trainer( model= our_resnet_model, device= device)

In [None]:
trainer.fit(train_dataloader, val_dataloader, num_epochs=2) 

In [None]:
test_predictions= trainer.predict(test_dataloader)

In [None]:
test_predictions[0]

In [None]:
test_predictions

In [None]:
test['label']= test_predictions[:,1]

In [None]:
plt.hist(test['label'], bins = 100)
plt.show()

In [None]:
for i in range(0, 40):
    plt.figure(figsize=(6,6))
    img = plt.imread(test.loc[i,'path'])
    plt.imshow(img)
    plt.title(f"predicted label: {round(test.loc[i,'label'],3)}")
    plt.show()

In [None]:
ss = pd.read_csv('/kaggle/input/dogs-vs-cats-redux-kernels-edition/sample_submission.csv')
del ss['label']
test['id'] = test['path'].apply(lambda x: int(x.split('/')[-1].split('.')[0]))
ss = ss.merge(test[['id', 'label']], how = 'left', on ='id')
ss.to_csv('submission4.csv', index = None)