In [1]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
import time
import os
import numpy as np
import pickle
import sys

In [2]:
resnet = torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.IMAGENET1K_V2)
resnet.fc = torch.nn.Linear(2048,2)
my_gpu = torch.device(0)
resnet.to(my_gpu)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [7]:
# Compute normalizations
from PIL import Image
means = []
stds = []
px = []
pxTot = 0
with open('G:/datasets/oxford-iiit-pet/annotations/trainval.txt','r') as f:
    buff = f.read()
folder = 'G:/datasets/oxford-iiit-pet/images/'
for line in buff.split('\n')[:-1]:
    imgName = line.split(' ')[0]+'.jpg'
    imgFname = os.path.join(folder,imgName)
    img = Image.open(imgFname).convert("RGB")
    pxVals = np.array(img.getdata()).astype(np.float64)/255.0
    px += [pxVals.shape[0]]
    means += [np.mean(pxVals,axis=0)]
    stds += [np.std(pxVals,axis=0)]
    pxTot += pxVals.shape[0]

In [8]:
meanSet = means[0]*px[0]/pxTot
stdSet = stds[0]*px[0]/pxTot
for idx in range(1,len(means)):
    meanSet += means[idx]*px[idx]/pxTot
    stdSet += stds[idx]*px[idx]/pxTot

In [14]:
with open('pets2.p','wb') as f:
    pickle.dump([meanSet,stdSet],f)

In [5]:
with open('pets2.p','rb') as f:
    meanSet, stdSet = pickle.load(f)

In [3]:
#img_transform = torchvision.transforms.Compose([torchvision.transforms.Resize((1024,1024)), torchvision.transforms.ToTensor()])
#img_transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),torchvision.transforms.Normalize(mean=meanSet.tolist(),std=stdSet.tolist())])
img_transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),torchvision.transforms.Normalize(mean=[.5,.5,.5],std=[.5,.5,.5])])
trainset = datasets.oxford_iiit_pet.OxfordIIITPet('G:/datasets/',transform=img_transform,target_types="binary-category")#,target_transform=target_transform)
testset = datasets.oxford_iiit_pet.OxfordIIITPet('G:/datasets/',split='test',transform=img_transform,target_types="binary-category")#,target_transform=target_transform)
trainloader = DataLoader(trainset,batch_size=1,shuffle=True)
testloader = DataLoader(testset,batch_size=1)
optim = torch.optim.SGD(resnet.parameters(),lr=1e-4,momentum=0.9)
crloss = torch.nn.CrossEntropyLoss()

In [5]:
trainset.bin_classes

['Cat', 'Dog']

In [6]:
loss_total = 0
step_total = 0
epochs = 5
for epoch in range(epochs):
    for img,target in trainloader:
        img = img.to(my_gpu)
        target = torch.Tensor(target).to(my_gpu)
        optim.zero_grad()
        output = resnet(img)
        loss = crloss(output,target)
        loss.backward()
        optim.step()
        #loss.detach()
        #torch.cuda.empty_cache()
        ld = loss.item()
        loss_total += ld
        step_total += 1
        if step_total % 100 == 0 and step_total != 0:
            print(loss_total/step_total)
            loss_total = 0
            step_total = 0
    resnet.eval()
    tStart = time.time()
    samples = 0
    correct = 0
    for img,target in trainloader:
        img = img.to(my_gpu)
        target = torch.Tensor(target).to(my_gpu)
        output = resnet(img)
        samples += 1
        if target in output.topk(1).indices:
            correct += 1
    print('=======Epoch '+str(epoch)+'========')
    print('% correct')
    print(correct/samples)
    print('======================')
    resnet.train()

0.663581845164299
0.6155605083703994
0.592000932097435
0.6751610872149467
0.5887317162752151
0.6257332482933998
0.6021158581972123
0.6141657182574272
0.6232419952750206
0.5744572883844375
0.5834768632054329
0.592637206017971
0.5436692699790001
0.5426294799149036
0.5202167256176472
0.5085837797820568
0.5502143274247646
0.5194624081254006
0.5159053406119347
0.4902373071014881
0.47680354952812193
0.4375767928361893
0.42169242836534976
0.39097883880138395
0.3687608067691326
0.3054849696904421
0.3373391476646066
0.30366690404713154
0.2442608793824911
0.2238437011092901
0.23065820999443531
0.21025106022134424
0.1888193991407752
0.17630351612344383
0.17873024562373757
0.1373212500102818
% correct
0.9657608695652173
0.11769979215227068
0.11443480278365314
0.08936834058724344
0.09485567497555167
0.10137063635978848
0.08264548687962815
0.08267131797038019
0.07293415494728833
0.06898615265032276
0.05897252506809309
0.0800251575105358
0.04611614447087049
0.06164936733199283
0.04368295985157602
0.0

In [5]:
torch.save(resnet.state_dict(),'pets2.pth')

In [9]:
resnet = torchvision.models.resnet50(num_classes=2)
sd = torch.load('pets2.pth')
resnet.load_state_dict(sd)
resnet.to(my_gpu)

In [None]:
resnet.eval()
tStart = time.time()
samples = 0
correct = 0
for img,target in testloader:
    img = img.to(my_gpu)
    target = torch.Tensor(target).to(my_gpu)
    output = resnet(img)
    samples += 1
    if target in output.topk(1).indices:
        correct += 1
tEnd = time.time()
print(correct/samples)
print(tEnd - tStart)
print(len(testloader)/(tEnd-tStart))