In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#%matplotlib inline 

import os
from sys import path
from zipfile import ZipFile

from tqdm import tqdm
from PIL import Image

import torch
import torch.nn as nn
from torch.utils.data import Dataset , DataLoader
from torchvision import transforms
from torch.optim import Adam
from torch.autograd import Variable

In [None]:
# config
LR = 1e-3
BATCH_SIZE = 20
EPOCHS = 10
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
TRAIN_PATH = "./train/"
TEST_PATH = "./test1/"
MODEL_PATH = "./model.pth"

In [None]:
class CustDat(Dataset):
    def __init__(self, imgs, class_to_int, path, mode, transforms):
        self.imgs = imgs
        self.class_to_int = class_to_int
        self.mode = mode
        self.transforms = transforms
        self.path = path
        
    def __len__(self):
        return len(self.imgs)
    
    def __getitem__(self, idx):
        img = Image.open(self.path + self.imgs[idx])
        if self.mode  in ["train" , "test"]:
            label = self.class_to_int[self.imgs[idx].split(".")[0]]
            img = self.transforms(img)
            return img, label
        elif self.mode == "pred":
            pred_id = self.imgs[idx].split(".")[0]
            img = self.transforms(img)
            return img, pred_id      

In [None]:
class CatandDogSet(Dataset):
    def __init__(self, path):
        self.path = path
        self.img_name = os.listdir(self.path)
                
    def __len__(self):
        return len(self.img_name)
    
    def __getitem__(self, idx):
        img_idx = self.img_name[idx]
        img_item_path = os.path.join(self.path, img_idx)
        img = Image.open(img_item_path)
        label = img_idx.split(".")[0]
        return img, label, img_item_path

In [None]:
with ZipFile('../input/dogs-vs-cats/train.zip', 'r') as zip_ref:
    zip_ref.extractall('.')
    
with ZipFile('../input/dogs-vs-cats/test1.zip', 'r') as zip_ref:
    zip_ref.extractall('.') 
    
!ls

In [None]:
#path = os.listdir("./train/")[100]
#label = name[:3]
#img = Image.open("./train/"+ path)
#print(path, label, plt.imshow(img))

In [None]:
imgs = os.listdir(TRAIN_PATH)

train_imgs = np.random.choice(imgs , 20000, replace="False")
print(len(train_imgs))
print(len(set(train_imgs)))

test_imgs = np.setdiff1d(imgs , train_imgs)
print(len(test_imgs))
print(len(set(test_imgs)))

pred_imgs = [f"{path}.jpg" for path in range(1,len(os.listdir(TEST_PATH))+1)]

In [None]:
class_to_int = {"cat" : 0 , "dog" : 1}
train_transforms = transforms.Compose([
    transforms.Resize((360 , 360)) , 
    transforms.RandomHorizontalFlip(p = 0.5) , 
    transforms.ToTensor() , 
    transforms.Normalize((0 , 0 , 0) , (1 , 1 , 1))
])

test_pred_transforms = transforms.Compose([
    transforms.Resize((360 , 360)) , 
    transforms.ToTensor() , 
    transforms.Normalize((0 , 0 , 0) , (1 , 1 , 1))
])

In [None]:
dataset = CatandDogSet(TRAIN_PATH)

train = DataLoader(CustDat(train_imgs , class_to_int , TRAIN_PATH , "train" , train_transforms) , batch_size = BATCH_SIZE , shuffle = True)
test = DataLoader(CustDat(test_imgs , class_to_int , TRAIN_PATH , "test" , test_pred_transforms) , batch_size = BATCH_SIZE , shuffle = True)
pred = DataLoader(CustDat(pred_imgs , class_to_int , TEST_PATH , "pred" , test_pred_transforms))

In [None]:
img, label , path = dataset[np.random.randint(len(dataset))]
plt.imshow(img)

In [None]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet34', pretrained=True)

In [None]:
num_last_features = model.fc.in_features

In [None]:
new_last_yr = nn.Sequential(
    nn.Linear(num_last_features , 1) , 
    nn.Sigmoid()
)

In [None]:
model.fc = new_last_yr

In [None]:
DEVICE

In [None]:
model_fin = model.to(DEVICE)

In [None]:
criterion = nn.BCELoss()
optimizer = Adam(model_fin.fc.parameters() , lr = LR)

In [None]:
for epoch in range(EPOCHS):
    model_fin.train()
    su_tr = 0
    tot_tr = 0
    loss_list = []
    
    train_pbar = tqdm(train, position=0, leave=True)
    for images , labels in train_pbar:
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
            
        optimizer.zero_grad()
        output = model_fin(images)
        loss = criterion(torch.squeeze(output) , labels.float())
        loss_list.append(loss.data)
        su_tr += torch.sum(output.reshape(labels.shape).round() == labels)
        tot_tr += labels.shape[0]
        loss.backward()
        optimizer.step()
        
    model_fin.eval()
    su_te = 0
    tot_te = 0
    with torch.no_grad():
        for images , labels in test:
            output = model_fin(images.cuda())
            su_te += torch.sum(output.reshape(labels.shape).round().cpu() == labels)
            tot_te += labels.shape[0]
    
    torch.save(model_fin, MODEL_PATH)
    #torch.save(model_fin.state_dict(), MODEL_PATH) 官方推薦
    print("loss is "+str(torch.mean(torch.tensor(loss_list)))+" train accu "+str(su_tr/tot_tr)+" test accu "+str(su_te/tot_te))

In [None]:
def test_submit(model):
    model.eval()
    with torch.no_grad():
        pred_label=[]
        for images , Id in tqdm(pred):
            output = model(images.cuda())
            label = output.round().cpu()
            pred_label.append(int(label))

    return pred_label

def predict_CatandDog(path):
    model = torch.load("./model.pth") #如果GPU訓練CPU跑要加 map_location=torch.device("CPU")
    if path in "./test1/":
        test_set = CatandDogSet(path)
        img, t_label , t_path = test_set[np.random.randint(len(test_set))]
    else:
        img = Image.open(path)
    
    img_ts = test_pred_transforms(img)
    img_ts = torch.reshape(img_ts, (1, 3, 360, 360))
    output = model(img_ts.cuda())
    label = output.round().cpu()
    ch_label = "dog" if int(label)==1 else "cat"


    print(ch_label)
    plt.imshow(img)

In [None]:
submit = pd.read_csv('../input/dogs-vs-cats/sampleSubmission.csv')
submit['label'] = test_submit(model_fin)
submit

In [None]:
submit.to_csv('./submit_dogcat.csv', index= False)

In [None]:
predict_CatandDog("./test1")

In [None]:
predict_CatandDog("./test1/1234.jpg")