In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
from tqdm import tqdm#_notebook as tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from functools import reduce
import os
from scipy.optimize import minimize
import plotly.express as px

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from torchvision import transforms, utils

In [None]:
PATH = '../input/pku-autonomous-driving/'
os.listdir(PATH)

In [None]:
train = pd.read_csv(PATH + 'train.csv')
test = pd.read_csv(PATH + 'sample_submission.csv')

In [None]:
train.head()

In [None]:
test.head()

In [None]:
camera_params = np.array([[2304.5479, 0, 1686.2379],
                         [0, 2305.8757, 1354.9849],
                         [0, 0, 1]], dtype = np.float32)

In [None]:
# This will take a prediction string from
# the training dataset and output an array of 
# dictionaries, where each dictionary contains 
# the information for a car in the given picture

def str2coords(input_str):
    names=['id', 'yaw', 'pitch', 'roll', 'x', 'y', 'z']
    coords = []
    for l in np.array(input_str.split()).reshape([-1, 7]):
        coords.append(dict(zip(names, l.astype('float'))))
        #if 'id' in coords[-1]:
         #   coords[-1]['id'] = int(coords[-1]['id'])
    #coords.pop('id')
    return coords

In [None]:
# This function takes the prediction string
# and then does the necessary math so that we
# get the x, y coordinates of the PIXEL corresponding
# to other cars in the image

def get_img_coords(input_str):
    coords = str2coords(input_str)
    
    xs = [c['x'] for c in coords]
    ys = [c['y'] for c in coords]
    zs = [c['z'] for c in coords]
    P = np.array(list(zip(xs, ys, zs))).T # Array where row1 is x's, row2 is y's, row 3 is z's
    
    img_p = np.dot(camera_params, P).T # This gives the pixel coordinates, multpilied by the z-coordinate
    
    # get rid of the z-coordinate constant by dividing it out
    img_p[:, 0] /= img_p[:, 2] 
    img_p[:, 1] /= img_p[:, 2] 
    
    img_xs = img_p[:, 0]
    img_ys = img_p[:, 1]
    img_zs = img_p[:, 2] # z = Distance from the camera
    
    # return the x and y pixel coordinates for each car in the picture
    return img_xs, img_ys
    
    

In [None]:
IMG_WIDTH = 1024
IMG_HEIGHT = IMG_WIDTH // 16*5
MODEL_SCALE = 8

In [None]:
def preprocess_image(img):
    
    # Cut the image in half vertically, since cars are only
    # in the bottom half of the image
    img = img[img.shape[0]//2:] 
    img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
    return (img / 255).astype('float32')
    

An example

In [None]:
plt.figure(figsize=(14,14))
plt.imshow(cv2.imread(PATH + 'train_images/' + train.iloc[0]['ImageId'] + '.jpg'))
plt.scatter(*get_img_coords(train.iloc[0]['PredictionString']), color='red', s=100);

In [None]:
get_img_coords(train['PredictionString'][0])

In [None]:
def get_mask_and_regr(img, labels):
    mask = np.zeros([IMG_HEIGHT // MODEL_SCALE, IMG_WIDTH // MODEL_SCALE], dtype='float32')
    #mask = np.zeros([IMG_HEIGHT // MODEL_SCALE, IMG_WITDH // MODEL_SCALE], dtype = 'float32')
    regr_names = ['x', 'y', 'z', 'yaw', 'pitch', 'roll']
    regr = np.zeros([IMG_HEIGHT // MODEL_SCALE, IMG_WIDTH // MODEL_SCALE, 6], dtype = 'float32')
    coords = str2coords(labels)
    xs, ys = get_img_coords(labels)
    
    for x, y, regr_dict in zip(xs, ys, coords):
        x, y = y, x 
        x = (x - img.shape[0] // 2) * IMG_HEIGHT / (img.shape[0] // 2) / MODEL_SCALE
        x = np.round(x).astype('int')
        y = y * IMG_WIDTH / (img.shape[1]) / MODEL_SCALE
        y = np.round(y).astype('int')
        if x >= 0 and x < IMG_HEIGHT // MODEL_SCALE and y >= 0 and y < IMG_WIDTH // MODEL_SCALE:
            mask[x, y] = 1
            #regr_dict = _regr_preprocess(regr_dict, flip)
            regr_dict.pop('id')
            regr[x, y] = [regr_dict[n] for n in sorted(regr_dict)]
            
    return mask, regr

In [None]:
# Make a mask for all the cars in the image,
# rather than the x and y pixel coordinates (why??)

def get_mask(img, labels):
    mask = np.zeros()

In [None]:
train_images_dir = PATH + 'train_images/{}.jpg'
test_images_dir = PATH + 'test_images/{}.jpg'

In [None]:
df_train, df_dev = train_test_split(train, test_size = 0.01, random_state = 42)
df_test = test

PyTorch works with *Datasets*.  Here, we need to define a custom dataset.  

Custom datasets should override the __len__ and __getitem__ methods

In [None]:
class CarDataset(Dataset):
    def __init__(self, dataframe, root_dir):
        self.df = dataframe
        self.root_dir = root_dir
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        idx, labels = self.df.values[idx]
        img_name = self.root_dir.format(idx)
        #print(labels)
        img0 = cv2.imread(img_name)
        img = preprocess_image(img0)
        #labels = str2coords(labels)
        mask, regr = get_mask_and_regr(img0, labels)
        regr = np.rollaxis(regr, 2, 0)
        return [img, mask, regr]
    

In [None]:
train_dataset = CarDataset(df_train, train_images_dir)
dev_dataset = CarDataset(df_dev, train_images_dir)
test_dataset = CarDataset(df_test, test_images_dir)

In [None]:
BATCH_SIZE = 4

train_loader = DataLoader(dataset = train_dataset, batch_size = BATCH_SIZE, shuffle = True)
dev_loader = DataLoader(dataset = dev_dataset, batch_size = BATCH_SIZE, shuffle = False)
test_loader = DataLoader(dataset = test_dataset, batch_size = BATCH_SIZE, shuffle = False)

Need to make a mas and regr so that each output is the same size

In [None]:
img0 = cv2.imread(PATH + 'train_images/' + train['ImageId'][6] + '.jpg')
img = preprocess_image(img0)

mask, regr = get_mask_and_regr(img0, train['PredictionString'][6])

In [None]:
points = np.argwhere(regr > 0)
d = points[0]

In [None]:
for a in points:
    print(a[0], a[1])
    break

In [None]:
plt.figure(figsize=(16,16))
plt.title('Processed image')
plt.imshow(img)
plt.show()

plt.figure(figsize=(16,16))
plt.title('Yaw values')
plt.imshow(regr[:,:,-2])
plt.show()

In [None]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size = 3, stride = 1, padding = 2),
            #nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size = 5, stride = 1, padding = 2),
            #nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size = 5, stride = 1, padding = 2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.drop_out = nn.Dropout()
        
        self.layer6 = nn.Sequential(
            nn.Conv2d(128, 6, 1),
            nn.ReLU())   
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        #out = self.layer4(out)
        #out = self.layer5(out)
        out = self.drop_out(out)
        out = self.layer6(out)
        return out

In [None]:
def criterion(prediction, mask, regr):
   
    loss = (torch.abs(prediction - regr).sum(1) * mask).sum(1).sum(1) / mask.sum(1).sum(1)
    #torch.abs(prediction.to(device) - regr.to(device)).sum(1).sum(1).sum(1)
    return loss.mean(0)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
model = ConvNet().to(device)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
model.train()

def train_model(epoch, history = None):
    for batch_idx, (img_batch, mask_batch, regr_batch) in enumerate(tqdm(train_loader)):
        
        img_batch = img_batch.float().to(device)
        #regr_batch = regr_batch.permute(0, 3, 1, 2)
        regr_batch = regr_batch.to(device)
        mask_batch = mask_batch.to(device)
        output = model(img_batch.permute(0, 3, 1, 2))
        output = output.to(device)
        
        loss = criterion(output, mask_batch, regr_batch)
        optimizer.zero_grad()
        if history is not None:
            history.loc[epoch + batch_idx / len(train_loader), 'train_loss'] = loss.data.cpu().numpy()

        loss.backward()

        optimizer.step()

        print('Train Epoch: {} \tLR: {:.6f}\tLoss: {:.6f}'.format(
            epoch,
            optimizer.state_dict()['param_groups'][0]['lr'],
            loss.data))

In [None]:
data, mask, regr = [ x[0] for x in iter(train_loader).next() ]

In [None]:
print(data.shape)
print(mask.shape)
print(regr.shape)

In [None]:
n_epochs = 1

In [None]:
%%time
import gc

history = pd.DataFrame()

for epoch in range(n_epochs):
    torch.cuda.empty_cache()
    gc.collect()
    train_model(epoch, history)

In [None]:
def extract_coords(prediction):
    logits = prediction[0]
    points = np.argwhere(logits > 0)
    col_names = ['x', 'y', 'z', 'yaw', 'pitch', 'roll']
    coords = []
    for r, c in points:
        regr_dict = dict(zip(col_names, prediction[:, r, c]))
        coords.append(regr_dict)
        coords[-1]['confidence'] = 1 / (1 + np.exp(-logits[r, c]))
        
    return coords

In [None]:
def coords2str(coords, names=['yaw', 'pitch', 'roll', 'x', 'y', 'z', 'confidence']):
    s = []
    for c in coords:
        for n in names:
            s.append(str(c.get(n, 0)))
    return ' '.join(s)

In [None]:
predictions = []

test_loader = DataLoader(dataset=test_dataset, batch_size=4, shuffle=False, num_workers=4)

model.eval()

for img, _, _, in tqdm(test_loader):
    with torch.no_grad():
        output = model(img.permute(0, 3, 1, 2).to(device))
        output = output.permute(0, 2, 3, 1)
    output = output.data.cpu().numpy()
    for out in output:
        coords = extract_coords(out)
        s = coords2str(coords)
        predictions.append(s)

In [None]:
test = pd.read_csv(PATH + 'sample_submission.csv')
test['PredictionString'] = predictions
test.to_csv('predictions.csv', index=False)
test.head()

In [None]:
test.loc[0, 'PredictionString']