In [75]:
import torch
from torch.utils.data import Dataset
import numpy as np
import json
import random
from PIL import Image
from matplotlib import cm
from IPython.display import display
import os

In [76]:
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)
print(torch.cuda.is_available())

False


In [77]:
data_dir = "../Project1/data"
image_dir = "../Project1/data/images"

In [78]:
class MyData(Dataset):
    def __init__(self):
        file = '../Project1/raw/data.json'
        f = open(file)
        self.data = json.load(f)
        f.close()
        self.labels = {} # filename: label
        self.box_w = []
        self.box_h = []
        self.images = {}
        self.bounding_boxes = {}
        self.x_train = []
        self.y_train = []
        self.gts = {}
        for key,val in self.data.items():
            try:
                imgPIL = Image.open('../Project1/raw/'+val['filename'])
                origwidth = imgPIL.size[0]
                origheight = imgPIL.size[1]
                imgPIL = imgPIL.resize((1280,1280),Image.LANCZOS)
                image = torch.Tensor(np.asarray(imgPIL)/255)
                self.images[val['filename']] = image
                gt = torch.zeros((image.shape[0:2]))
                box_data = [] # x_center, y_center, width, height (from 0 - 1)
                for k2,v2 in val['regions'].items():
                    rectdata = v2['shape_attributes']
                    x1 = int(np.floor(1280*float(rectdata['x'])/origwidth))
                    y1 = int(np.floor(1280*float(rectdata['y'])/origheight))
                    x2 = int(np.ceil(1280*float(rectdata['x']+rectdata['width'])/origwidth))
                    y2 = int(np.ceil(1280*float(rectdata['y']+rectdata['height'])/origheight))
                    gt[y1:y2,x1:x2] = 1
                    box_data.append([x1,y1,x2,y2])
                    self.box_w.append(x2-x1)
                    self.box_h.append(y2-y1)
                self.gts[val['filename']] = gt
                self.bounding_boxes[val['filename']] = box_data
                print('File: {}; number: {}'.format(val['filename'],len(val['regions'])))
            except IOError:
                print('File not found: {}'.format(val['filename']))
    def gen_train_data(self):
        i = 0
        for filename in self.images:
            image = self.images[filename]
            box_data = self.bounding_boxes[filename]
            gt = self.gts[filename]
            for x1,y1,x2,y2 in box_data:
                cropped = image[y1:min(y2,1280),x1:min(x2,1280),:]
                cropped_img = Image.fromarray(np.uint8((cropped.numpy())*255))
                cropped_img_path = os.path.join(image_dir,filename.split('.')[0]+"_"+str(i)+".jpg")
                i += 1
                cropped_img.save(cropped_img_path)
                self.x_train.append(cropped_img)
                self.y_train.append(1)
                self.labels[cropped_img_path] = 1
            max_w,min_w,max_h,min_h = max(self.box_w),min(self.box_w),max(self.box_h),min(self.box_h)
            step = 10
            for k in range(200):
                x = random.randint(0,1280-max_w)
                y = random.randint(0,1280-max_h)
                # randomly pick some windows that do not contain nodes
                x1,x2,y1,y2 = x,x+max_w,y,y+max_h
                if np.sum(np.array(gt[y1:y2,x1:x2])) == 0:
                    cropped = image[y1:y2,x1:x2,:]
                    cropped_img = Image.fromarray(np.uint8((cropped.numpy())*255))
                    cropped_img_path = os.path.join(image_dir,filename.split('.')[0]+"_"+str(i)+".jpg")
                    i += 1
                    # cropped_img.save(cropped_img_path) no need to save
                    self.x_train.append(cropped_img)
                    self.y_train.append(0)
                    self.labels[cropped_img_path] = 0
                else:
                    k -= 1

    def __len__(self):
        return len(self.imgs)
    def __getitem__(self,idx):
        d,g = self.imgs[idx], self.gts[idx].unsqueeze(0)
        return d, g, idx

In [79]:
data = MyData()

File: 005-9-01.jpg; number: 276
File: 022-5-02.jpg; number: 10
File: 22-14-01.jpg; number: 261
File: W005-4-05.jpg; number: 133
File: W005-08-03.jpg; number: 426
File: W005-08-04.jpg; number: 271
File: W005-08-06.jpg; number: 199
File: W005-14-02.jpg; number: 18
File not found: W022-02-07.jpg
File: W022-04-04.jpg; number: 237
File: W022-07-03.jpg; number: 280
File not found: W022-16-05.jpg
File not found: W022-19-06.jpg
File not found: W035-11-05.jpg
File not found: W035-18-03.jpg
File not found: W035-19-06.jpg
File not found: W035-19-07.jpg
File not found: W035-20-02.jpg


In [80]:
data.gen_train_data()

In [81]:
len(np.where(np.array(data.y_train)>0)[0])

2111

# Start Training

In [82]:
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision.models as models
from torch.utils.data import DataLoader
from torch.utils.data import random_split
import time
import copy

In [83]:
seed = 0
transform = transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [275]:
from torch.nn.modules.pooling import MaxPool2d
class myNet(nn.Module):
    def __init__(self):
        super(myNet, self).__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=2, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2,2),stride=2),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(4,4),stride=2),
            nn.Dropout(0.25),
            nn.Flatten(),
            nn.Linear(in_features=6272,out_features=256),
            nn.ReLU(),
            nn.Linear(in_features=256,out_features=128),
            nn.Tanh(),
            nn.Linear(in_features=128,out_features=1),
            nn.Sigmoid(),
            )
        
    def forward(self, x):
        return self.layer(x)

In [283]:
class TrainDataset(Dataset):
    def __init__(self):
        self.x = []
        self.y = []

    def get_data(self,x_data,y_data):
        for im in x_data:
            self.x.append(transform(im))
        for l in y_data:
            self.y.append(torch.tensor(l, dtype=torch.float32))
    
    def __getitem__(self, i):
        return self.x[i], self.y[i]

    def __len__(self):
        return len(self.y)

In [309]:
net = myNet()
criterion = nn.BCELoss()
optimizer_ft = optim.Adam(net.parameters(), lr=0.005)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
device = "cpu"

In [310]:
train_data = TrainDataset()
train_data.get_data(data.x_train,data.y_train)

In [311]:
batch_size = 128
numTrain = int(len(train_data)*0.6)
numVal = int(len(train_data)*0.2)
numTest = len(train_data)-numTrain-numVal

In [312]:
train, val, test = random_split(train_data, [numTrain, numVal, numTest], generator=torch.Generator().manual_seed(seed))
trainloader = DataLoader(train, batch_size=batch_size,shuffle=True)
valloader = DataLoader(val, batch_size=batch_size,shuffle=False)
testloader = DataLoader(test, batch_size=batch_size,shuffle=False)
torch.save(trainloader, os.path.join(data_dir,"trainloader.pt"))
torch.save(valloader, os.path.join(data_dir,"valloader.pt"))
torch.save(testloader, os.path.join(data_dir,"testloader.pt"))

In [316]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            running_loss = 0.0
            running_corrects = 0
            correct = 0
            if phase == 'train':
                model.train()  # Set model to training mode
                for inputs, labels in trainloader:
                    inputs = inputs.to(device)
                    labels = labels.to(device)
                    # zero the parameter gradients
                    optimizer.zero_grad()
                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs).view(-1)
                        loss = criterion(outputs, labels)
                        # backward + optimize
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()
                        correct +=  (outputs.detach().numpy().round() == labels.detach().numpy()).sum().item()
                    # statistics
                    running_loss += loss.item() * inputs.size(0)
                print('train Accuracy:{:.4f}'.format(correct/len(trainloader.dataset)))
                scheduler.step()
                epoch_loss = running_loss / len(train_data)
            else:
                with torch.no_grad():
                    model.eval()   # Set model to evaluate mode
                    for inputs, labels in valloader:
                        inputs = inputs.to(device)
                        labels = labels.to(device)
                        # zero the parameter gradients
                        optimizer.zero_grad()
                        # forward
                        # track history if only in train
                        with torch.set_grad_enabled(phase == 'train'):
                            outputs = model(inputs).view(-1)
                            loss = criterion(outputs, labels)
                            correct +=  (outputs.detach().numpy().round() == labels.detach().numpy()).sum().item()
                        # statistics
                        running_loss += loss.item() * inputs.size(0)
                epoch_loss = running_loss / len(train_data)
                print('val Accuracy:{:.4f}'.format(correct/len(valloader.dataset)))
            # Iterate over data.

            print('{} Loss: {:.4f}'.format(
                phase, epoch_loss))
            # deep copy the model
        print()
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [317]:
model = train_model(net, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=10)

Epoch 0/9
----------
train Accuracy:0.9395
train Loss: 0.1603
val Accuracy:0.9648
val Loss: 0.0298

Epoch 1/9
----------
train Accuracy:0.9648
train Loss: 0.0761
val Accuracy:0.9700
val Loss: 0.0233

Epoch 2/9
----------
train Accuracy:0.9765
train Loss: 0.0552
val Accuracy:0.9752
val Loss: 0.0171

Epoch 3/9
----------
train Accuracy:0.9791
train Loss: 0.0434
val Accuracy:0.9817
val Loss: 0.0130

Epoch 4/9
----------
train Accuracy:0.9826
train Loss: 0.0330
val Accuracy:0.9869
val Loss: 0.0114

Epoch 5/9
----------
train Accuracy:0.9861
train Loss: 0.0323
val Accuracy:0.9778
val Loss: 0.0151

Epoch 6/9
----------
train Accuracy:0.9852
train Loss: 0.0285
val Accuracy:0.9804
val Loss: 0.0134

Epoch 7/9
----------
train Accuracy:0.9896
train Loss: 0.0277
val Accuracy:0.9883
val Loss: 0.0090

Epoch 8/9
----------
train Accuracy:0.9913
train Loss: 0.0217
val Accuracy:0.9896
val Loss: 0.0086

Epoch 9/9
----------
train Accuracy:0.9904
train Loss: 0.0215
val Accuracy:0.9896
val Loss: 0.0091



In [326]:
torch.save(model,os.path.join(data_dir,"model.pt"))

In [325]:
for inputs, labels in testloader:
    outputs = model(inputs.to(device))
    for i in range(len(outputs.detach().numpy().round())):
        print("pred:{},label:{}, correct:{}".format(outputs.detach().numpy().round()[i][0],labels.detach().numpy()[i],outputs.detach().numpy().round()[i][0]==labels.detach().numpy()[i]))

pred:1.0,label:1.0, correct:True
pred:0.0,label:0.0, correct:True
pred:0.0,label:0.0, correct:True
pred:0.0,label:0.0, correct:True
pred:1.0,label:1.0, correct:True
pred:0.0,label:0.0, correct:True
pred:1.0,label:1.0, correct:True
pred:1.0,label:1.0, correct:True
pred:0.0,label:0.0, correct:True
pred:0.0,label:0.0, correct:True
pred:1.0,label:1.0, correct:True
pred:0.0,label:0.0, correct:True
pred:1.0,label:0.0, correct:False
pred:0.0,label:0.0, correct:True
pred:0.0,label:0.0, correct:True
pred:1.0,label:0.0, correct:False
pred:1.0,label:1.0, correct:True
pred:0.0,label:0.0, correct:True
pred:0.0,label:0.0, correct:True
pred:1.0,label:1.0, correct:True
pred:1.0,label:1.0, correct:True
pred:1.0,label:1.0, correct:True
pred:1.0,label:0.0, correct:False
pred:1.0,label:0.0, correct:False
pred:1.0,label:1.0, correct:True
pred:1.0,label:1.0, correct:True
pred:0.0,label:0.0, correct:True
pred:0.0,label:0.0, correct:True
pred:1.0,label:1.0, correct:True
pred:1.0,label:1.0, correct:True
pred:1