In [1]:
import torch
from torch.utils.data import Dataset
import numpy as np
import json
import random
from PIL import Image
from matplotlib import cm
from IPython.display import display
import os

In [55]:
torch.manual_seed(10)
np.random.seed(10)
random.seed(10)
print(torch.cuda.is_available())

False


In [3]:
data_dir = "../Project1/data"
image_dir = "../Project1/data/images"

In [7]:
class MyData(Dataset):
    def __init__(self):
        file = '../Project1/raw/data.json'
        f = open(file)
        self.data = json.load(f)
        f.close()
        self.labels = {} # filename: label
        self.box_w = []
        self.box_h = []
        self.images = {}
        self.bounding_boxes = {}
        self.x_train = []
        self.y_train = []
        self.gts = {}
        for key,val in self.data.items():
            try:
                imgPIL = Image.open('../Project1/raw/'+val['filename'])
                origwidth = imgPIL.size[0]
                origheight = imgPIL.size[1]
                imgPIL = imgPIL.resize((1280,1280),Image.LANCZOS)
                image = torch.Tensor(np.asarray(imgPIL)/255)
                self.images[val['filename']] = image
                gt = torch.zeros((image.shape[0:2]))
                box_data = [] # x_center, y_center, width, height (from 0 - 1)
                for k2,v2 in val['regions'].items():
                    rectdata = v2['shape_attributes']
                    x1 = int(np.floor(1280*float(rectdata['x'])/origwidth))
                    y1 = int(np.floor(1280*float(rectdata['y'])/origheight))
                    x2 = int(np.ceil(1280*float(rectdata['x']+rectdata['width'])/origwidth))
                    y2 = int(np.ceil(1280*float(rectdata['y']+rectdata['height'])/origheight))
                    gt[y1:y2,x1:x2] = 1
                    box_data.append([x1,y1,x2,y2])
                    self.box_w.append(x2-x1)
                    self.box_h.append(y2-y1)
                self.gts[val['filename']] = gt
                self.bounding_boxes[val['filename']] = box_data
                print('File: {}; number: {}'.format(val['filename'],len(val['regions'])))
            except IOError:
                print('File not found: {}'.format(val['filename']))
    def gen_train_data(self):
        i = 0
        for filename in self.images:
            image = self.images[filename]
            box_data = self.bounding_boxes[filename]
            gt = self.gts[filename]
            for x1,y1,x2,y2 in box_data:
                cropped = image[y1:min(y2,1280),x1:min(x2,1280),:]
                cropped_img = Image.fromarray(np.uint8((cropped.numpy())*255))
                cropped_img_path = os.path.join(image_dir,filename.split('.')[0]+"_"+str(i)+".jpg")
                i += 1
                # cropped_img.save(cropped_img_path)
                self.x_train.append(cropped_img)
                self.y_train.append(1)
                self.labels[cropped_img_path] = 1
            max_w,min_w,max_h,min_h = max(self.box_w),min(self.box_w),max(self.box_h),min(self.box_h)
            step = 10
            for k in range(200):
                x = random.randint(0,1280-max_w)
                y = random.randint(0,1280-max_h)
                # randomly pick some windows that do not contain nodes
                x1,x2,y1,y2 = x,x+max_w,y,y+max_h
                if np.sum(np.array(gt[y1:y2,x1:x2])) == 0:
                    cropped = image[y1:y2,x1:x2,:]
                    cropped_img = Image.fromarray(np.uint8((cropped.numpy())*255))
                    cropped_img_path = os.path.join(image_dir,filename.split('.')[0]+"_"+str(i)+".jpg")
                    i += 1
                    # cropped_img.save(cropped_img_path) no need to save
                    self.x_train.append(cropped_img)
                    self.y_train.append(0)
                    self.labels[cropped_img_path] = 0
                else:
                    k -= 1

    def __len__(self):
        return len(self.imgs)
    def __getitem__(self,idx):
        d,g = self.imgs[idx], self.gts[idx].unsqueeze(0)
        return d, g, idx

In [8]:
data = MyData()

File: 005-9-01.jpg; number: 276
File: 022-5-02.jpg; number: 10
File: 22-14-01.jpg; number: 261
File: W005-4-05.jpg; number: 133
File: W005-08-03.jpg; number: 426
File: W005-08-04.jpg; number: 271
File: W005-08-06.jpg; number: 199
File: W005-14-02.jpg; number: 18
File not found: W022-02-07.jpg
File: W022-04-04.jpg; number: 237
File: W022-07-03.jpg; number: 280
File not found: W022-16-05.jpg
File not found: W022-19-06.jpg
File not found: W035-11-05.jpg
File not found: W035-18-03.jpg
File not found: W035-19-06.jpg
File not found: W035-19-07.jpg
File not found: W035-20-02.jpg


In [9]:
data.gen_train_data()

In [10]:
len(np.where(np.array(data.y_train)>0)[0])

2111

# Start Training

In [56]:
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision.models as models
from torch.utils.data import DataLoader
from torch.utils.data import random_split
import time
import copy

In [153]:
seed = 0
transform = transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [181]:
from torch.nn.modules.pooling import MaxPool2d
class myNet(nn.Module):
    def __init__(self):
        super(myNet, self).__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=4, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2,2),stride=2),
            nn.Conv2d(32, 64, kernel_size=2, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2,2),stride=2),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2,2),stride=2),
            nn.Dropout(0.25),
            nn.Flatten(),
            nn.Linear(in_features=1152,out_features=512),
            nn.ReLU(),
            nn.Linear(in_features=512,out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128,out_features=1),
            nn.Sigmoid(),
            )
        
    def forward(self, x):
        return self.layer(x)

In [182]:
class TrainDataset(Dataset):
    def __init__(self):
        self.x = []
        self.y = []

    def get_data(self,x_data,y_data):
        for im in x_data:
            self.x.append(transform(im))
        for l in y_data:
            self.y.append(torch.tensor(l, dtype=torch.float32))
    
    def __getitem__(self, i):
        return self.x[i], self.y[i]

    def __len__(self):
        return len(self.y)

In [190]:
net = myNet()
criterion = nn.BCELoss()
optimizer_ft = optim.Adam(net.parameters(), lr=0.002)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
device = "cpu"

In [191]:
train_data = TrainDataset()
train_data.get_data(data.x_train,data.y_train)

In [192]:
batch_size = 64
numTrain = int(len(train_data)*0.6)
numVal = int(len(train_data)*0.2)
numTest = len(train_data)-numTrain-numVal

In [193]:
train, val, test = random_split(train_data, [numTrain, numVal, numTest], generator=torch.Generator().manual_seed(seed))
trainloader = DataLoader(train, batch_size=batch_size,shuffle=True)
valloader = DataLoader(val, batch_size=batch_size,shuffle=False)
testloader = DataLoader(test, batch_size=batch_size,shuffle=False)
torch.save(trainloader, os.path.join(data_dir,"trainloader.pt"))
torch.save(valloader, os.path.join(data_dir,"valloader.pt"))
torch.save(testloader, os.path.join(data_dir,"testloader.pt"))

In [198]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            running_loss = 0.0
            running_corrects = 0
            correct = 0
            if phase == 'train':
                model.train()  # Set model to training mode
                for inputs, labels in trainloader:
                    inputs = inputs.to(device)
                    labels = labels.to(device)
                    # zero the parameter gradients
                    optimizer.zero_grad()
                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs).view(-1)
                        loss = criterion(outputs, labels)
                        # backward + optimize
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()
                        correct +=  (outputs.detach().numpy().round() == labels.detach().numpy()).sum().item()
                    # statistics
                    running_loss += loss.item() * inputs.size(0)
                print('train Accuracy:{:.4f}'.format(correct/len(trainloader.dataset)))
                scheduler.step()
                epoch_loss = running_loss / len(train_data)
            else:
                with torch.no_grad():
                    model.eval()   # Set model to evaluate mode
                    for inputs, labels in valloader:
                        inputs = inputs.to(device)
                        labels = labels.to(device)
                        # zero the parameter gradients
                        optimizer.zero_grad()
                        # forward
                        # track history if only in train
                        with torch.set_grad_enabled(phase == 'train'):
                            outputs = model(inputs).view(-1)
                            loss = criterion(outputs, labels)
                            correct +=  (outputs.detach().numpy().round() == labels.detach().numpy()).sum().item()
                        # statistics
                        running_loss += loss.item() * inputs.size(0)
                epoch_loss = running_loss / len(train_data)
                print('val Accuracy:{:.4f}'.format(correct/len(valloader.dataset)))
            # Iterate over data.

            print('{} Loss: {:.4f}'.format(
                phase, epoch_loss))
            # deep copy the model
        print()
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    # load best model weights
    # model.load_state_dict(best_model_wts)
    return model

In [199]:
model = train_model(net, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=10)

Epoch 0/9
----------
train Accuracy:0.7567
train Loss: 0.3408
val Accuracy:0.8864
val Loss: 0.0490

Epoch 1/9
----------
train Accuracy:0.9669
train Loss: 0.0740
val Accuracy:0.9778
val Loss: 0.0163

Epoch 2/9
----------
train Accuracy:0.9765
train Loss: 0.0438
val Accuracy:0.9804
val Loss: 0.0140

Epoch 3/9
----------
train Accuracy:0.9852
train Loss: 0.0330
val Accuracy:0.9791
val Loss: 0.0126

Epoch 4/9
----------
train Accuracy:0.9856
train Loss: 0.0269
val Accuracy:0.9830
val Loss: 0.0103

Epoch 5/9
----------
train Accuracy:0.9865
train Loss: 0.0261
val Accuracy:0.9830
val Loss: 0.0100

Epoch 6/9
----------
train Accuracy:0.9869
train Loss: 0.0255
val Accuracy:0.9843
val Loss: 0.0097

Epoch 7/9
----------
train Accuracy:0.9874
train Loss: 0.0246
val Accuracy:0.9843
val Loss: 0.0095

Epoch 8/9
----------
train Accuracy:0.9878
train Loss: 0.0247
val Accuracy:0.9830
val Loss: 0.0094

Epoch 9/9
----------
train Accuracy:0.9900
train Loss: 0.0228
val Accuracy:0.9843
val Loss: 0.0092



In [202]:
torch.save(model,os.path.join("./model.pt"))

In [201]:
model.eval()
correct = 0
with torch.no_grad():
    for inputs, labels in testloader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs.to(device))
        # print(outputs.view(-1))
        for i in range(len(outputs.detach().numpy().round())):
            print("pred:{},label:{}, correct:{}".format(outputs.detach().numpy()[i][0],labels.detach().numpy()[i],outputs.detach().numpy().round()[i][0]==labels.detach().numpy()[i]))
            if outputs.detach().numpy().round()[i][0]==labels.detach().numpy()[i]:
                correct +=  1
    print('accuracy: {}'.format(correct/len(testloader.dataset)))

pred:0.9968127608299255,label:1.0, correct:True
pred:0.003394708037376404,label:0.0, correct:True
pred:0.00015211626305244863,label:0.0, correct:True
pred:0.010890219360589981,label:0.0, correct:True
pred:0.9958603978157043,label:1.0, correct:True
pred:0.015626968815922737,label:0.0, correct:True
pred:0.9957160353660583,label:1.0, correct:True
pred:0.9596471786499023,label:1.0, correct:True
pred:0.000875182799063623,label:0.0, correct:True
pred:0.0005397836212068796,label:0.0, correct:True
pred:0.9905224442481995,label:1.0, correct:True
pred:0.004358378238976002,label:0.0, correct:True
pred:0.0008140229620039463,label:0.0, correct:True
pred:4.833287312067114e-05,label:0.0, correct:True
pred:0.014332939870655537,label:0.0, correct:True
pred:0.0029386673122644424,label:0.0, correct:True
pred:0.9987315535545349,label:1.0, correct:True
pred:5.721901106880978e-05,label:0.0, correct:True
pred:5.245916690910235e-05,label:0.0, correct:True
pred:0.996148943901062,label:1.0, correct:True
pred:0.

In [197]:
model1 = torch.load("./model_old.pt")
model1.eval()
correct = 0
with torch.no_grad():
    for inputs, labels in testloader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model1(inputs.to(device))
        # print(outputs.view(-1))
        for i in range(len(outputs.detach().numpy().round())):
            print("pred:{},label:{}, correct:{}".format(outputs.detach().numpy()[i][0],labels.detach().numpy()[i],outputs.detach().numpy().round()[i][0]==labels.detach().numpy()[i]))
            if outputs.detach().numpy().round()[i][0]==labels.detach().numpy()[i]:
                correct +=  1
    print('accuracy: {}'.format(correct/len(testloader.dataset)))

pred:0.5711843371391296,label:1.0, correct:True
pred:0.17134639620780945,label:0.0, correct:True
pred:0.1855345368385315,label:0.0, correct:True
pred:0.19594837725162506,label:0.0, correct:True
pred:0.5717763900756836,label:1.0, correct:True
pred:0.2340906411409378,label:0.0, correct:True
pred:0.5712940692901611,label:1.0, correct:True
pred:0.571761965751648,label:1.0, correct:True
pred:0.19879205524921417,label:0.0, correct:True
pred:0.4566923975944519,label:0.0, correct:True
pred:0.5717451572418213,label:1.0, correct:True
pred:0.17292973399162292,label:0.0, correct:True
pred:0.500709593296051,label:0.0, correct:False
pred:0.4975651502609253,label:0.0, correct:True
pred:0.19486768543720245,label:0.0, correct:True
pred:0.5593975186347961,label:0.0, correct:False
pred:0.5071094036102295,label:1.0, correct:True
pred:0.4968653917312622,label:0.0, correct:True
pred:0.4987078905105591,label:0.0, correct:True
pred:0.5714699625968933,label:1.0, correct:True
pred:0.571524977684021,label:1.0, c