In [1]:
import os
import os.path
import sys
import torch
import torch.utils.data as data
import numpy as np
from PIL import Image
import torchvision as tv

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from torch.utils.tensorboard import SummaryWriter
from model import CorrResNet18
# from dataset import imgcontroldataset

device = 'cuda' if torch.cuda.is_available() else 'cpu'
os.environ["CUDA_VISIBLE_DEVICES"] = '0'  # only has 1 GPU set GPU 0 as default

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# Hyper-parameters
num_epochs = 80
batch_size = 27
learning_rate = 0.001
arch = 'resnet18'
# load the pre-trained weights
model_file = './pre_trained/%s_places365.pth.tar' % arch
writer = SummaryWriter('./runs/exp1_129')

In [3]:
class CorrResNet18(nn.Module):

    def __init__(self, num_actions = 4, fine_tuning = False):
        super(CorrResNet18, self).__init__()
        resnet = models.resnet18(pretrained=False)
        for param in resnet.parameters():
            param.requires_grad = fine_tuning
        self.conv1 = resnet.conv1
        self.bn1 = resnet.bn1
        self.relu = resnet.relu
        self.maxpool = resnet.maxpool
        self.layer1 = resnet.layer1
        self.layer2 = resnet.layer2
        self.layer3 = resnet.layer3
        self.layer4 = resnet.layer4
        self.avgpool = resnet.avgpool   #  output feature dimension 512 for input image dimension 224*224
        self.num_ftrs = resnet.fc.in_features
        self.linear_sensor = nn.Linear(5, 128)
        self.linear_final = nn.Linear(2*self.num_ftrs+2*128, num_actions)       

    def forward(self, img, meta): # image pairs are cat along the channel dimension [batch, 6, width, height]
        siam1 = []
        siam2 = []
        for i in range(2): # the siamese network architecture 
            # for image
            x = self.relu(self.bn1(self.conv1(img[:,(i*3):(i+1)*3, :, :])))
            x = self.maxpool(x)
            x = self.layer4(self.layer3(self.layer2(self.layer1(x))))
            x = self.avgpool(x)
            x = x.view(-1, self.num_ftrs)
            siam1.append(x)
            # for sensor data
            y = self.linear_sensor(meta[:,:,i])
            siam2.append(y)
        out = torch.cat((siam1[0], siam1[1], siam2[0], siam2[1]), dim = 1)
        out = self.linear_final(out)
        return out

In [4]:
model = CorrResNet18() #models.__dict__[arch](num_classes=365)
checkpoint = torch.load(model_file, map_location=lambda storage, loc: storage)

In [5]:
state_dict = {str.replace(k,'module.',''): v for k,v in checkpoint['state_dict'].items()}
del state_dict['fc.weight']
del state_dict['fc.bias']
state_dict["linear_sensor.weight"] = nn.init.xavier_uniform_(model.linear_sensor.weight.data).float()
state_dict["linear_sensor.bias"] = model.linear_sensor.bias.data.fill_(0.01).float()
state_dict["linear_final.weight"] = nn.init.xavier_uniform_(model.linear_final.weight.data).float()
state_dict["linear_final.bias"] = model.linear_final.bias.data.fill_(0.01).float()
model.load_state_dict(state_dict)

<All keys matched successfully>

In [6]:
class imgcontroldataset(data.Dataset):
    def __init__(self, txt_dir = '.', mode = 'train', image_size = (224, 224)):
        super(imgcontroldataset, self).__init__()
        self.datacsv = np.loadtxt(os.path.join(txt_dir, "%s.txt" % mode)).astype(int)
        self.image_size = image_size
        
    def __len__(self):
        return 3*self.datacsv.shape[0]  # augment the dataset for 3 times, randomness from randomcrop
        
    def __getitem__(self, idx):
        idx = idx % len(self.datacsv)
        
        img_cur_path = os.path.join('./train_data/image',str(self.datacsv[idx, 0]),str(self.datacsv[idx, 1]),str(self.datacsv[idx, 2])+'error.png')
        img_cur = Image.open(img_cur_path).convert('RGB')
        img_exp_path = os.path.join('./train_data/image',str(self.datacsv[idx, 0]),str(self.datacsv[idx, 1]),str(self.datacsv[idx, 2])+'after_cor.png')
        img_exp = Image.open(img_exp_path).convert('RGB')  
        meta_cur_path = os.path.join('./train_data/metadata',str(self.datacsv[idx, 0]),str(self.datacsv[idx, 1]),str(self.datacsv[idx, 2])+'error.txt')
        meta_cur = np.loadtxt(meta_cur_path)
        meta_exp_path = os.path.join('./train_data/metadata',str(self.datacsv[idx, 0]),str(self.datacsv[idx, 1]),str(self.datacsv[idx, 2])+'after_cor.txt')
        meta_exp = np.loadtxt(meta_exp_path)
        ctrl_path = os.path.join('./train_data/ctrl',str(self.datacsv[idx, 0]),str(self.datacsv[idx, 1]),str(self.datacsv[idx, 2])+'converted_nml.txt')
        ctrl = np.loadtxt(ctrl_path)
        
        # img transformation
        transform = tv.transforms.Compose([        
            tv.transforms.Resize(min(self.image_size)),    # fix the ratio and keep the smaller edge to 224 according to Relative **
            tv.transforms.RandomCrop(self.image_size),
            tv.transforms.ColorJitter(),  # randomly change hue, contrast illumination
            tv.transforms.ToTensor(),    
            tv.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),    # copied from pytorch tutorial imagenet
            ])
        img1 = transform(img_exp)
        img2 = transform(img_cur)
        img = torch.cat((img1, img2), dim = 0).float()  # The dimension 0 is the RGB channel
        meta = torch.cat((torch.from_numpy(meta_exp)[:, None], torch.from_numpy(meta_cur)[:, None]), dim = 1).float()
        ctrl = torch.from_numpy(ctrl).float()
        
        return img, meta, ctrl

In [7]:
# data loader
train_set = imgcontroldataset()
train_loader = torch.utils.data.DataLoader(train_set, batch_size=16, shuffle=True, pin_memory=False)
val_set = imgcontroldataset(mode="val")
val_loader = torch.utils.data.DataLoader(val_set, batch_size=16, shuffle=False, pin_memory=False)


# Loss and optimizer
model.to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [11]:
# training loop with log recorded
# For updating learning rate
def update_lr(optimizer, lr):    
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

# Train the model
total_step = len(train_loader)
curr_lr = learning_rate
running_loss = 0.0
model.train()
for epoch in range(num_epochs):
    for i, data in enumerate(train_loader):
        image = data[0].to(device)  
        meta = data[1].to(device)
        labels = data[2].to(device)
        
        
        # Forward pass
        outputs = model(image, meta)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if (i+1) % 16 == 0:
            
            print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

            # ...log the running loss
            writer.add_scalar('training loss',
                            running_loss / 16,
                            epoch * len(train_loader) + i)

            # ...log a Matplotlib Figure showing the model's predictions on a
            # random mini-batch
#             writer.add_figure('predictions vs. actuals',
#                             plot_classes_preds(model, (image, meta), labels),
#                             global_step=epoch * len(train_loader) + i)
    running_loss = 0.0
    
    
        
    # Decay learning rate
    if (epoch+1) % 16 == 0:
        curr_lr /= 3
        update_lr(optimizer, curr_lr)
        torch.save(model.state_dict(), '%s_model.pth' % (epoch))

# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for i, data in val_loader:
        image = data[0].to(device)  
        meta = data[1].to(device)
        labels = data[2].to(device)
        outputs = model(image, meta)
        loss = criterion(outputs, labels)

    print('MSE of the model on the test data: {} %'.format(loss))

# Save the model checkpoint
torch.save(model.state_dict(), 'resnet.ckpt')

Epoch [1/80], Step [16/17] Loss: 0.0834
Epoch [2/80], Step [16/17] Loss: 0.0694
Epoch [3/80], Step [16/17] Loss: 0.0673
Epoch [4/80], Step [16/17] Loss: 0.0746
Epoch [5/80], Step [16/17] Loss: 0.0520
Epoch [6/80], Step [16/17] Loss: 0.0429
Epoch [7/80], Step [16/17] Loss: 0.0464
Epoch [8/80], Step [16/17] Loss: 0.0373
Epoch [9/80], Step [16/17] Loss: 0.0455
Epoch [10/80], Step [16/17] Loss: 0.0427
Epoch [11/80], Step [16/17] Loss: 0.0409
Epoch [12/80], Step [16/17] Loss: 0.0467
Epoch [13/80], Step [16/17] Loss: 0.0502
Epoch [14/80], Step [16/17] Loss: 0.0381
Epoch [15/80], Step [16/17] Loss: 0.0357
Epoch [16/80], Step [16/17] Loss: 0.0309
Epoch [17/80], Step [16/17] Loss: 0.0242
Epoch [18/80], Step [16/17] Loss: 0.0316
Epoch [19/80], Step [16/17] Loss: 0.0391
Epoch [20/80], Step [16/17] Loss: 0.0365
Epoch [21/80], Step [16/17] Loss: 0.0307
Epoch [22/80], Step [16/17] Loss: 0.0233
Epoch [23/80], Step [16/17] Loss: 0.0289
Epoch [24/80], Step [16/17] Loss: 0.0254
Epoch [25/80], Step [16/1

ValueError: too many values to unpack (expected 2)

In [12]:
1024+256

1280