In [1]:
import torch
import torch.nn as nn
import torch.utils.data as udata
from torch.autograd import Variable
from torchvision import datasets, transforms, models
import numpy as np 
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw 
import cv2
import csv
import os
from classes import CNN

In [2]:
device = torch.device("cuda: 0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [3]:
TRAIN_DIR = "data/train"
TRAIN_LABEL = "data/train.csv"
DATA_DIR = "data/data150.npy"

In [4]:
IMG_SIZE = 150
BATCH_SIZE = 1

In [5]:
annotation = np.load(os.path.join(TRAIN_DIR,"annotation.npy"),allow_pickle=True)

In [6]:
def visualize_train_data():
    a = np.random.randint(30000)
    fig = plt.figure(figsize=(20,30))
    for id, (name,label,bbox) in enumerate(annotation[a:a+10]):
        fig.add_subplot(5,2,id+1)

        img = cv2.imread(os.path.join(TRAIN_DIR,name))
        h,w,_ = img.shape

        targetSize = IMG_SIZE

        x_scale = targetSize/w
        y_scale = targetSize/h

        bx,by,bw,bh = bbox
        bx = int(np.round(bx * x_scale))
        by = int(np.round(by * y_scale))
        bw = int(np.round(bw * x_scale))
        bh = int(np.round(bh * y_scale))

        img = cv2.resize(img, (targetSize, targetSize))

        cv2.rectangle(img,(bx,by),(bx+bw,by+bh), color = (255,0,0),thickness = 1)
        plt.imshow(img)

    plt.show() 

In [7]:
class CNN(nn.Module):
    def __init__(self, inp, d, out):
        super(CNN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(inp,d,kernel_size=3,stride = 1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2,2),
            
            nn.Conv2d(d,d*2,kernel_size=3,stride = 1, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(2,2),
            
            nn.Conv2d(d*2,d*4,kernel_size=3,stride = 1, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(2,2),
            
            nn.Conv2d(d*4,d*8,kernel_size=3,stride = 1, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(2,2),
            
            nn.Conv2d(d*8,d*8,kernel_size=3,stride = 1, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(2,2)
        
        )
      
        self.fc = nn.Sequential(
            nn.Linear(16384,256),
            nn.ReLU(inplace = True),
            nn.Dropout(0.5),
            
            nn.Linear(256,4096),
            nn.ReLU(inplace = True),
            nn.Dropout(0.5),
            
            nn.Linear(4096,256),
            nn.ReLU(inplace = True),
            nn.Dropout(0.5),
            
            nn.Linear(256,out)
        )
    
        
    def forward(self, x):
        x = self.conv(x)
        x = x.reshape(x.size(0), -1)
        x = self.fc(x)
        return x

In [8]:
from tqdm import tqdm
data = []
def make_data():
    for id, (name,label,bbox) in tqdm(enumerate(annotation)):
        img = cv2.imread(os.path.join(TRAIN_DIR,name),0)
        h,w = img.shape
        label = [int(float(i)) for i in label]
        x_scale = IMG_SIZE/w
        y_scale = IMG_SIZE/h

        bx,by,bw,bh = bbox
        bx = int(np.round(bx * x_scale))
        by = int(np.round(by * y_scale))
        bw = int(np.round(bw * x_scale))
        bh = int(np.round(bh * y_scale))
        bb = (bx,by,bw,bh)
        img = cv2.resize(img,(IMG_SIZE,IMG_SIZE))
        data.append([np.array(img), bb, label])
        
MAKE_DATA = 0
if MAKE_DATA:     
    make_data()
    np.save("data/data150.npy",data)

In [9]:
data = np.load(DATA_DIR, allow_pickle=True)

In [10]:
traindata = [i[0] for i in data]
trainbbox = [i[1] for i in data]

In [11]:
traindata = np.array(traindata)
traindata = traindata/255

In [12]:
print(traindata)

[[[0.43137255 0.43137255 0.42352941 ... 1.         1.         1.        ]
  [0.44705882 0.43529412 0.43921569 ... 1.         1.         1.        ]
  [0.44313725 0.43921569 0.45490196 ... 1.         1.         1.        ]
  ...
  [0.05490196 0.08627451 0.0745098  ... 1.         1.         1.        ]
  [0.09019608 0.07843137 0.08627451 ... 1.         1.         1.        ]
  [0.08235294 0.07058824 0.06666667 ... 1.         1.         1.        ]]

 [[0.52941176 0.5254902  0.5254902  ... 0.34117647 0.35294118 0.36078431]
  [0.52941176 0.5254902  0.52156863 ... 0.34117647 0.35686275 0.36078431]
  [0.52941176 0.5254902  0.52156863 ... 0.34117647 0.35294118 0.35686275]
  ...
  [0.53333333 0.53333333 0.53333333 ... 0.42745098 0.43529412 0.43921569]
  [0.53333333 0.53333333 0.53333333 ... 0.42745098 0.43921569 0.44313725]
  [0.53333333 0.53333333 0.53333333 ... 0.42745098 0.43921569 0.44313725]]

 [[0.91372549 0.91372549 0.91764706 ... 0.91764706 0.91764706 0.91764706]
  [0.91372549 0.913725

In [13]:
traindata_tensor = torch.from_numpy(traindata).long()
trainbbox_tensor = torch.from_numpy(np.array(trainbbox))
trainset = udata.TensorDataset(traindata_tensor,trainbbox_tensor)

In [14]:
trainloader = udata.DataLoader(trainset, batch_size=BATCH_SIZE, num_workers=4)

In [15]:
def train(model, optimizer, error, trainloader):
    model.train()
    for (img,label) in trainloader:
        img_tensor = Variable(torch.LongTensor(img)).view(-1,1,IMG_SIZE,IMG_SIZE).to(device)
        label_tensor = Variable(torch.LongTensor(label)).to(device)
        outputs = model(img_tensor)
        loss = error(outputs, label_tensor)
        loss.backward()
        optimizer.step()
    return loss.data

In [16]:

#model = CNN(1,16,4).to(device)
model = models.alexnet(pretrained=True)
model = model.to(device)
#Freeze all convolution layer
for param in model.parameters():
    param.requires_grad = False

#Unfreeze Fully-connected layer, edit the last layer 
model.fc = nn.Linear(in_features=512, out_features=4).to(device)


optimizer = torch.optim.Adam(model.parameters(), lr = 0.002)
error = nn.MSELoss()

loss_list = []
def start_process(model,optimizer,error,trainloader):
    print("Start Training...")
    for epoch in range(100):
        loss = train(model,optimizer,error,trainloader)
        loss_list.append(loss.data)
        print("Epoch: {},  Loss: {}".format(epoch+1,loss.data))


In [17]:
start_process(model,optimizer,error,trainloader)

Start Training...


RuntimeError: Couldn't open shared file mapping: <torch_244_224215525>, error code: <1450>