# PREDICTION comparison using the saved models:

## resnet , WITH mask

## running only 3rd model = 'model_kagg-rohit-center-resnet-1_ep4-loss-focal-mask.pth'

### 1) Time

### 2) MSE for the outputs

In [0]:
import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from functools import reduce
import os
from sklearn.model_selection import train_test_split
from scipy.optimize import minimize
from tqdm.auto import tqdm as tq
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from torchvision import transforms, utils

import albumentations as alb

In [2]:
# Gets the GPU if there is one, otherwise the cpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
print(device)


cuda


In [0]:
USEMASK = True

KAGGLE = False
COLAB = True

# SPECIFY THE MODEL PATH BELOW BEFORE RUNNING

In [0]:
## KAGGLE and COLAB flags should not be True at same time - fail if this is case
assert not (KAGGLE and COLAB), "both KAGGLE and COLAB runs cannot be true"

In [5]:
if KAGGLE and not COLAB:          #  kaggle run
    HOMEDIR = r'../input/pku-autonomous-driving/'
    OUTDIR = r'./'
    model_path_dir = None
    
elif COLAB and not KAGGLE:         # google colab
    from google.colab import drive
    drive.mount('/content/drive')
    #
    HOMEDIR = r'/content/drive/My Drive/baidu/pku-autonomous-driving/'
    OUTDIR = r'./content/drive/My Drive/baidu/pku-autonomous-driving/outputROHIT/'
    model_path_dir = r'/content/drive/My Drive/baidu/Models/Cent-Resnet18_WITHmask/'

elif not KAGGLE and not COLAB:
    HOMEDIR = r"/media/rohit/DATA/EverythingD/01SRH-BDBA Acads/CaseStudy2/Kaggle-PekingAutonomousDriving/pku-autonomous-driving/"
    OUTDIR = r"/media/rohit/DATA/EverythingD/01SRH-BDBA Acads/CaseStudy2/Kaggle-PekingAutonomousDriving/pku-autonomous-driving/output/"
    model_path_dir = r'/home/rohit/SRH/CaseStudy2/Models/Cent-Resnet18_WITHmask/'

#
if COLAB: print(f"COLAB")
if KAGGLE: print(f"KAGGLE")
if not (KAGGLE and COLAB): print(f"laptop, not CLOUD\n")
else: print(f"on CLOUD\n")

print(f"HOMEDIR =\n{HOMEDIR}\n\nOUTDIR=\n{OUTDIR}\n\nmodel_path_dir =\n{model_path_dir}\n")

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive
COLAB
laptop, not CLOUD

HOMEDIR =
/content/drive/My Drive/baidu/pku-autonomous-driving/

OUTDIR=
./content/drive/My Drive/baidu/pku-autonomous-driving/outputROHIT/

model_path_dir =
/content/drive/My Drive/baidu/Models/Cent-Resnet18_WITHmask/



In [0]:
assert model_path_dir is not None, "model path directory is not specified"

# Basic info loading

## data, camera matrix specification


## train.csv   has   ImageId, PredictionString

## Prediction String    model# yaw pitch roll x y z  and the same for multiple cars

In [0]:
dfTrain = pd.read_csv(HOMEDIR + 'train.csv')
dfTest = pd.read_csv(HOMEDIR + 'sample_submission.csv')

# From camera.zip
camera_matrix = np.array([[2304.5479, 0,  1686.2379],
                          [0, 2305.8757, 1354.9849],
                          [0, 0, 1]], dtype=np.float32)

In [8]:
dfTrain.head()

Unnamed: 0,ImageId,PredictionString
0,ID_8a6e65317,16 0.254839 -2.57534 -3.10256 7.96539 3.20066 ...
1,ID_337ddc495,66 0.163988 0.192169 -3.12112 -3.17424 6.55331...
2,ID_a381bf4d0,43 0.162877 0.00519276 -3.02676 2.1876 3.53427...
3,ID_7c4a3e0aa,43 0.126957 -3.04442 -3.10883 -14.738 24.6389 ...
4,ID_8b510fad6,37 0.16017 0.00862796 -3.0887 -3.04548 3.4977 ...


In [9]:
bad_images_list = ["ID_1a5a10365","ID_4d238ae90","ID_408f58e9f","ID_bb1d991f6","ID_c44983aeb"]
for bad_id in bad_images_list:
    #plt.imshow( cv2.imread(HOMEDIR + 'train_images/' + bad_id + '.jpg')[:,:,::-1] )
    #plt.show()
    pass
print(f"full df len = {len(dfTrain)}")
drop_dfTrain = dfTrain.set_index("ImageId").drop( index=bad_images_list )
dfTrain = drop_dfTrain.reset_index()
print(f"post removal of bad entries len = {len(dfTrain)}")

full df len = 4262
post removal of bad entries len = 4257


# Set swtich for very small data run

In [0]:
TEST_SWITCH_ON = False

In [0]:
if TEST_SWITCH_ON:
    dfTrain = dfTrain[:20]
    dfTest  = dfTest[:6]

In [0]:
def str2coords(ps, names=['id', 'yaw', 'pitch', 'roll', 'x', 'y', 'z']):
# from the prediction string entries, create a dict of each car data
    coords = []
    for ps_entry in np.array(ps.split()).reshape([-1, 7]):
        dictval = dict(zip(names, ps_entry.astype('float')))
        coords.append(dictval)
        if 'id' in coords[-1]:
            coords[-1]['id'] = int(coords[-1]['id'])
    return coords

# 2D Visualization

In [0]:
def rotate(x, angle):
    x = x + angle
    x = x - (x + np.pi) // (2 * np.pi) * 2 * np.pi
    return x

In [0]:
def read_image(path):
    img = cv2.imread(path)
    #img = np.array(img[:, :, ::-1]) # alternative way to convert BGR to RGB
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 
    return img

In [0]:
def convert_to_img_coords(ps):
# convert the camera coords x,y,z to the image coords
    coords = str2coords(ps)
    x_list = [c['x'] for c in coords]
    y_list = [c['y'] for c in coords]
    z_list = [c['z'] for c in coords]
    P = np.array(list(zip(x_list, y_list, z_list))).T
    img_p = np.dot(camera_matrix, P).T
    img_p[:, 0] /= img_p[:, 2]
    img_p[:, 1] /= img_p[:, 2]
    img_x_list = img_p[:, 0]
    img_y_list = img_p[:, 1]
    img_z_list = img_p[:, 2]
    return img_x_list, img_y_list

# 3D Visualization

In [0]:
from math import sin, cos

# convert euler angle to rotation matrix
def euler_to_Rot(yaw, pitch, roll):
    Y = np.array([[cos(yaw), 0, sin(yaw)],
                  [0, 1, 0],
                  [-sin(yaw), 0, cos(yaw)]])
    P = np.array([[1, 0, 0],
                  [0, cos(pitch), -sin(pitch)],
                  [0, sin(pitch), cos(pitch)]])
    R = np.array([[cos(roll), -sin(roll), 0],
                  [sin(roll), cos(roll), 0],
                  [0, 0, 1]])
    return np.dot(Y, np.dot(P, R))

In [0]:
def draw_line(image, points):
    color = (255, 0, 0)
    cv2.line(image, tuple(points[0][:2]), tuple(points[3][:2]), color, 16)
    cv2.line(image, tuple(points[0][:2]), tuple(points[1][:2]), color, 16)
    cv2.line(image, tuple(points[1][:2]), tuple(points[2][:2]), color, 16)
    cv2.line(image, tuple(points[2][:2]), tuple(points[3][:2]), color, 16)
    return image


def draw_points(image, points):
    for (p_x, p_y, p_z) in points:
        cv2.circle(image, (p_x, p_y), int(1000 / p_z), (0, 255, 0), -1)
#         if p_x > image.shape[1] or p_y > image.shape[0]:
#             print('Point', p_x, p_y, 'is out of image with shape', image.shape)
    return image

# Average ratios of the spanX, spanY and spanZ  by groups decided by me

GroupNo based on Z / X value bin.
GroupNo     Xspan       Yspan       Zspan       Yspan/Xspan     Zspan/Xspan
1           1.96        1.58        3.78		0.80            1.92
2           2.08        1.65        4.49        0.80            2.15
3           2.03        1.57        4.86        0.78            2.38

average of all these
GroupNo     Xspan       Yspan       Zspan       Yspan/Xspan     Zspan/Xspan
            1.00        0.79        2.15        0.79            2.15

In [0]:
def visualize(img, coords):
    ## want to visualize 
    x_l = 1.00
    y_l = 0.79
    z_l = 2.15
    
    img = img.copy()
    for point in coords:
        # Get values
        x, y, z = point['x'], point['y'], point['z']
        ## the yaw and pitch is interchanged in the data provided
        yaw, pitch, roll = -point['pitch'], -point['yaw'], -point['roll']
        # Math
        Rt = np.eye(4)
        t = np.array([x, y, z])
        Rt[:3, 3] = t
        Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T
        Rt = Rt[:3, :]
        P = np.array([[x_l, -y_l, -z_l, 1],
                      [x_l, -y_l, z_l, 1],
                      [-x_l, -y_l, z_l, 1],
                      [-x_l, -y_l, -z_l, 1],
                      [0, 0, 0, 1]]).T
        img_cor_points = np.dot(camera_matrix, np.dot(Rt, P))
        img_cor_points = img_cor_points.T
        img_cor_points[:, 0] /= img_cor_points[:, 2]
        img_cor_points[:, 1] /= img_cor_points[:, 2]
        img_cor_points = img_cor_points.astype(int)
        # Drawing
        img = draw_line(img, img_cor_points)
        img = draw_points(img, img_cor_points[-1:])
    
    return img

# Steps to preprocess input image

In [0]:
# Original dimensions of the Train and Test images
ORIG_W = 3384
ORIG_H = 2710

# The dimensions we want to use for processing: keeping ratio of width:height = 1:4
IMG_WIDTH = 2048
IMG_HEIGHT = 512
MARGIN_W = ORIG_W // 4  # 846

MODEL_SCALE = 8  # mask shrink rate

FX, FY = 2304.5479,  2305.8757
CX, CY = 1686.2379, 1354.9849
def XYZ2UV(x,y,z):
    u = FX * x / z + CX
    v = FY * y / z + CY
    return u,v
def UVZ2XY(u,v,z):
    x = z * (u - CX) / FX
    y = z * (v - CY) / FY
    return x,y

#
# u is horizontal dimension and v is vertical dimension
#
def VU2maskVU(v,u):  
    mask_V = (v - ORIG_H // 2) * IMG_HEIGHT / (ORIG_H // 2) / MODEL_SCALE
    mask_U = (u + MARGIN_W) * IMG_WIDTH  / (ORIG_W + 2*MARGIN_W) / MODEL_SCALE
    return mask_V, mask_U
def maskVU2VU(mask_v_float, mask_u_float):
    v = ORIG_H // 2 + mask_v_float * MODEL_SCALE / IMG_HEIGHT * (ORIG_H // 2)
    u = mask_u_float * MODEL_SCALE * (ORIG_W + 2*MARGIN_W) / IMG_WIDTH - MARGIN_W
    return v, u

## assertion usage
REGR_TARGETS = sorted( ["yaw","pitch_sin", "pitch_cos", "roll", "udiff", "vdiff", "z"] )
def _regr_preprocess(regr_dict, vdiff, udiff):
    """ vdiff(h orientation), udiff is regression target """
    regr_dict["vdiff"] = vdiff
    regr_dict["udiff"] = udiff

    # Roll
    regr_dict['roll'] = rotate(regr_dict['roll'], np.pi)
    
    # Pitch
    regr_dict['pitch_sin'] = sin(regr_dict['pitch'])
    regr_dict['pitch_cos'] = cos(regr_dict['pitch'])

    # Regress log(Z)
    regr_dict["z"] = np.log(regr_dict["z"])
    
    regr_dict.pop('x')
    regr_dict.pop('y')
    regr_dict.pop('pitch')
    regr_dict.pop('id')
    return regr_dict

def _regr_back(regr_dict, mask_V_pos, mask_U_pos):
    # convert log(z) back to z
    regr_dict["z"] = np.exp(regr_dict["z"])

    _v, _u = maskVU2VU( mask_V_pos + regr_dict["vdiff"], mask_U_pos + regr_dict["udiff"] )
    regr_dict["x"], regr_dict["y"] = UVZ2XY(_u, _v, regr_dict["z"])

    regr_dict['roll'] = rotate(regr_dict['roll'], -np.pi)

    ## Pitch
    pitch_sin = regr_dict['pitch_sin'] / np.sqrt(regr_dict['pitch_sin']**2 + regr_dict['pitch_cos']**2)
    pitch_cos = regr_dict['pitch_cos'] / np.sqrt(regr_dict['pitch_sin']**2 + regr_dict['pitch_cos']**2)
    regr_dict['pitch'] = np.arccos(pitch_cos) * np.sign(pitch_sin)
    
    return regr_dict

def preprocess_image(img):
    img = img[img.shape[0] // 2:]
    bg = np.ones_like(img) * img.mean(1, keepdims=True).astype(img.dtype)
    bg = bg[:, :MARGIN_W]
    img = np.concatenate([bg, img, bg], 1)
    img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
    return (img / 255).astype('float32')
def preprocess_mask_image(img):  # 上関数とといっしょに編集するように注意
    img = img[img.shape[0] // 2:]
    bg = np.zeros_like(img).astype(img.dtype)
    bg = bg[:, :img.shape[1] // 4]
    img = np.concatenate([bg, img, bg], 1)
    img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))  # linear interpolate
    return (img / 255).astype('float32')

# https://github.com/xingyizhou/CenterNet/blob/819e0d0dde02f7b8cb0644987a8d3a370aa8206a/src/lib/utils/image.py
# heatmap: H, W
# center : X(w direction), Y(H direction)
##################### mu_x = int(center[0] + 0.5) CAUSES BUG ##################################

def draw_msra_gaussian(heatmap, center, sigma):
    # tmp_size = sigma * 3
    tmp_size = np.ceil(sigma * 3).astype(int)  # tmp_size should be int for readability ( and to remove bug ? )
    mu_x = int(center[0])
    mu_y = int(center[1])
    w, h = heatmap.shape[0], heatmap.shape[1]
    ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
    br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
    if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0:
        return heatmap
    size = 2 * tmp_size + 1
    x = np.arange(0, size, 1, np.float32)
    y = x[:, np.newaxis]
    x0 = y0 = size // 2
    g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
    g_x = max(0, -ul[0]), min(br[0], h) - ul[0]
    g_y = max(0, -ul[1]), min(br[1], w) - ul[1]
    img_x = max(0, ul[0]), min(br[0], h)
    img_y = max(0, ul[1]), min(br[1], w)
    heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum(
      heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]],
      g[g_y[0]:g_y[1], g_x[0]:g_x[1]])
    return heatmap

def make_heatmap(m, v_arr, u_arr, z_arr):
    for v,u,z in zip(v_arr, u_arr, z_arr):
        # sigma = 1000 / 3.  / z / MODEL_SCALE
        sigma = 800 / 3.  / z / MODEL_SCALE
        m = draw_msra_gaussian(m, (u,v), sigma)
    return m
        

def get_mask_and_regr(img, labels):
    mask = np.zeros([IMG_HEIGHT // MODEL_SCALE, IMG_WIDTH // MODEL_SCALE], dtype='float32')
    regr = np.zeros([IMG_HEIGHT // MODEL_SCALE, IMG_WIDTH // MODEL_SCALE, 7], dtype='float32')
    coords = str2coords(labels)
    xs, ys = convert_to_img_coords(labels)
    z_arr = [e["z"] for e in coords]
    
    mask_V_arr_float, mask_U_arr_float = VU2maskVU( ys, xs )

    # use floor floowing paper
    mask_V_arr = np.floor( mask_V_arr_float ).astype('int')
    mask_U_arr = np.floor( mask_U_arr_float ).astype('int')
    mask_V_diff = mask_V_arr_float - mask_V_arr
    mask_U_diff = mask_U_arr_float - mask_U_arr

    # make heatmap
    mask = make_heatmap(mask, mask_V_arr, mask_U_arr, z_arr)
    
    for mask_V,mask_U, vdiff,udiff, regr_dict in zip(mask_V_arr,mask_U_arr,mask_V_diff,mask_U_diff, coords):
        if mask_V >= 0 and mask_V < IMG_HEIGHT // MODEL_SCALE and mask_U >= 0 and mask_U < IMG_WIDTH // MODEL_SCALE:
            regr_dict = _regr_preprocess(regr_dict, vdiff, udiff)
            regr[mask_V, mask_U] = [regr_dict[n] for n in sorted(regr_dict)]
    return mask, regr

In [0]:
def vis_mask(img, mask):
    _mm = np.repeat( np.repeat(mask, 8, axis=0), 8, axis=1 )[:,:, None]
    _mm = np.repeat(_mm, 3, axis=-1)
    _mm [:,:,1] = 0 ; _mm[:,:,2] = 0
    
    tmp =  np.clip( 0.8 * img + 0.4 * _mm, 0,1)
    tmp[ _mm[:,:,0]==1 ] = [0,1,1]
    plt.figure(figsize=(16,16))
    plt.imshow( tmp , alpha=0.3)
    plt.grid()
    plt.show()


# PyTorch Dataset

In [0]:
class CarDataset(Dataset):
    """Car dataset."""

    def __init__(self, dataframe, root_dir, mask_root_dir, training=True):
        self.df = dataframe
        self.root_dir = root_dir
        self.mask_root_dir = mask_root_dir  # ignore mask
        self.training = training

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        # Get image name
        idx, labels = self.df.values[idx]
        img_name = self.root_dir.format(idx)
        
        ## Read image
        img0 = cv2.imread(img_name)[:,:,::-1]
        img = preprocess_image(img0.astype(float))
        img = np.rollaxis(img, 2, 0)
        
        ## Read ignore mask
        ign_img0 = cv2.imread(self.mask_root_dir.format(idx), cv2.IMREAD_GRAYSCALE)
        if ign_img0 is None:  # where there is no mask image available
            ign_img0 = np.zeros((ORIG_H, ORIG_W), dtype='float32')

        ign_img = np.array(ign_img0).astype('float32') / 255.
        # ign_img = np.rollaxis(ign_img, 2, 0)
        ######################################################

        # ignore mask for CNN
        ign_img_for_feed = preprocess_mask_image(ign_img0)
        ign_img_for_feed = np.expand_dims(ign_img_for_feed, 0)  # h,w -> 1,h,w
        
        
        # Get mask and regression maps
        if self.training:
            mask, regr = get_mask_and_regr(img0, labels)
            regr = np.rollaxis(regr, 2, 0)
        else:
            mask, regr = 0, 0
        
        return [img, mask, regr, ign_img, ign_img_for_feed]

In [0]:
train_images_dir = HOMEDIR + 'train_images/{}.jpg'
test_images_dir = HOMEDIR + 'test_images/{}.jpg'
train_masks_dir = HOMEDIR + 'train_masks/{}.jpg'
test_masks_dir = HOMEDIR + 'test_masks/{}.jpg'

df_train, df_dev = train_test_split(dfTrain, test_size=0.1, random_state=1042)
df_test = dfTest

## Create objects of the class type Dataset -  one for each data set
train_dataset = CarDataset(df_train, train_images_dir, train_masks_dir)
dev_dataset = CarDataset(df_dev, train_images_dir, train_masks_dir)
test_dataset = CarDataset(df_test, test_images_dir, test_masks_dir)

# PyTorch Model - resnet WITH mask

In [0]:
class double_conv(nn.Module):
    '''(conv => BN => ReLU) * 2'''
    def __init__(self, in_ch, out_ch):
        super(double_conv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        x = self.conv(x)
        return x

class up(nn.Module):
    def __init__(self, in_ch, out_ch, bilinear=True):
        super(up, self).__init__()

        #  would be a nice idea if the upsampling could be learned too,
        #  but my machine do not have enough memory to handle all those weights
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        else:
            self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2)

        self.conv = double_conv(in_ch, out_ch)

    def forward(self, x1, x2=None):
        x1 = self.up(x1)
        
        # input is CHW
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = F.pad(x1, (diffX // 2, diffX - diffX//2,
                        diffY // 2, diffY - diffY//2))
        
        # for padding issues, see 
        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
        
        if x2 is not None:
            x = torch.cat([x2, x1], dim=1)
        else:
            x = x1
        x = self.conv(x)
        return x

def get_mesh(batch_size, shape_x, shape_y):
    mg_x, mg_y = np.meshgrid(np.linspace(0, 1, shape_y), np.linspace(0, 1, shape_x))
    mg_x = np.tile(mg_x[None, None, :, :], [batch_size, 1, 1, 1]).astype('float32')
    mg_y = np.tile(mg_y[None, None, :, :], [batch_size, 1, 1, 1]).astype('float32')
    mesh = torch.cat([torch.tensor(mg_x).to(device), torch.tensor(mg_y).to(device)], 1)
    return mesh

In [0]:
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo


model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}


def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


def conv1x1(in_planes, out_planes, stride=1):
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1):
        super(BasicBlock, self).__init__()

        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.GroupNorm(16, planes)

        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.GroupNorm(16, planes)

        if stride != 1 or inplanes != planes:
            self.downsample = nn.Sequential(
                conv1x1(inplanes, planes, stride), nn.GroupNorm(16, planes))
        else:
            self.downsample = None


    def forward(self, x):
        identity = x

        out = F.relu(self.bn1(self.conv1(x)), inplace=True)
        out = self.bn2(self.conv2(out))

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = F.relu(out, inplace=True)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = conv1x1(inplanes, planes)
        self.bn1 = nn.GroupNorm(16, planes)
        self.conv2 = conv3x3(planes, planes, stride)
        self.bn2 = nn.GroupNorm(16, planes)
        self.conv3 = conv1x1(planes, planes * self.expansion)
        self.bn3 = nn.GroupNorm(16, planes * self.expansion)

        if stride != 1 or inplanes != planes * self.expansion:
            self.downsample = nn.Sequential(
                conv1x1(inplanes, planes * self.expansion, stride), 
                nn.GroupNorm(16, planes * self.expansion))
        else:
            self.downsample = None

    def forward(self, x):
        identity = x

        out = F.relu(self.bn1(self.conv1(x)), inplace=True)
        out = F.relu(self.bn2(self.conv2(out)), inplace=True)
        out = self.bn3(self.conv3(out))
 
        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = F.relu(out)

        return out


class ResNetFeatures(nn.Module):

    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, input_channels=3):
        super(ResNetFeatures, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.GroupNorm(16, 64)

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)


        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)

    def _make_layer(self, block, planes, blocks, stride=1):

        layers = []
        layers.append(block(self.inplanes, planes, stride))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)


    def forward(self, x):
        conv1 = F.relu(self.bn1(self.conv1(x)), inplace=True)
        conv1 = F.max_pool2d(conv1, 3, stride=2, padding=1)

        feats4 = self.layer1(conv1)
        feats8 = self.layer2(feats4)
        feats16 = self.layer3(feats8)
        feats32 = self.layer4(feats16)

        return feats8, feats16, feats32



def resnet18(pretrained=False, **kwargs):
    """Constructs a ResNet-18 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNetFeatures(BasicBlock, [2, 2, 2, 2], **kwargs)
    if pretrained:
        _load_pretrained(model, model_zoo.load_url(model_urls['resnet18']))
    return model



def resnet34(pretrained=False, **kwargs):
    """Constructs a ResNet-34 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNetFeatures(BasicBlock, [3, 4, 6, 3], **kwargs)
    if pretrained:
        _load_pretrained(model, model_zoo.load_url(model_urls['resnet34']))
    return model


def _load_pretrained(model, pretrained):
    model_dict = model.state_dict()
    pretrained = {k : v for k, v in pretrained.items() if k in model_dict}
    model_dict.update(pretrained)
    model.load_state_dict(model_dict)

In [0]:
class CentResnet(nn.Module):
    '''Mixture of previous classes'''
    def __init__(self, n_classes):
        super(CentResnet, self).__init__()
        self.base_model = resnet18(pretrained=False, input_channels=6)
        
        # Lateral layers convert resnet outputs to a common feature size
        self.lat8 = nn.Conv2d(128, 256, 1)
        self.lat16 = nn.Conv2d(256, 256, 1)
        self.lat32 = nn.Conv2d(512, 256, 1)
        self.bn8 = nn.GroupNorm(16, 256)
        self.bn16 = nn.GroupNorm(16, 256)
        self.bn32 = nn.GroupNorm(16, 256)

        if USEMASK:
            self.conv0 = double_conv(5 + 1, 64)
        else:
            self.conv0 = double_conv(5, 64)
            
        self.conv1 = double_conv(64, 128)
        self.conv2 = double_conv(128, 512)
        self.conv3 = double_conv(512, 1024)
        
        self.mp = nn.MaxPool2d(2)
        
        self.up1 = up(1282 , 512) #+ 1024
        self.up2 = up(512 + 512, 256)
        self.outc = nn.Conv2d(256, n_classes, 1)
        
    
    def forward(self, x):
        batch_size = x.shape[0]
        mesh1 = get_mesh(batch_size, x.shape[2], x.shape[3])
        x0 = torch.cat([x, mesh1], 1)
        x1 = self.mp(self.conv0(x0))
        x2 = self.mp(self.conv1(x1))
        x3 = self.mp(self.conv2(x2))
        x4 = self.mp(self.conv3(x3))
        
        #feats = self.base_model.extract_features(x)
                # Run frontend network
        if USEMASK:
            ## feats8, feats16, feats32 = self.base_model(x[:,0:3])  ## use first 3 channel. this may not be proper way
            feats8, feats16, feats32 = self.base_model(x0)  ## C=6 : rgb(3)+mask(1)+mesh(2)
        else:
            feats8, feats16, feats32 = self.base_model(x)

        lat8 = F.relu(self.bn8(self.lat8(feats8)))
        lat16 = F.relu(self.bn16(self.lat16(feats16)))
        lat32 = F.relu(self.bn32(self.lat32(feats32)))
        
        # Add positional info
        mesh2 = get_mesh(batch_size, lat32.shape[2], lat32.shape[3])
        feats = torch.cat([lat32, mesh2], 1)
        #print(feats.shape)
        #print (x4.shape)
        x = self.up1(feats, x4)
        x = self.up2(x, x3)
        x = self.outc(x)
        return x

In [0]:
def infer_image(img, ign_mask_for_feed):  # shape:[B,C,H,W]
    return model( torch.from_numpy( np.concatenate((img, ign_mask_for_feed),axis=1) ).to(device) )

In [0]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

######## def postprocess_heatmap(logits, thresh=0.60):  # V6+ignMask, LB0.062
######## def postprocess_heatmap(logits, thresh=0.50):  # V6+ignMask, LB0.066
######## def postprocess_heatmap(logits, thresh=0.40):  # V6+ignMask, LB0.065
######## def postprocess_heatmap(logits, thresh=0.30):  # V6+ignMask, LB0.063

### def postprocess_heatmap(logits, thresh=0.4):
def postprocess_heatmap(logits, thresh=0.45):
    prob = sigmoid(logits)
    mp2d = torch.nn.MaxPool2d(3, stride=1, padding=1, dilation=1, return_indices=False, ceil_mode=False)
    out = mp2d( torch.Tensor([[prob]]) ).numpy()[0][0]
    return (prob == out) & (prob > thresh)

In [0]:
DISTANCE_THRESH_CLEAR = 2


In [0]:
def clear_duplicates(coords):
    for c1 in coords:
        xyz1 = np.array([c1['x'], c1['y'], c1['z']])
        for c2 in coords:
            xyz2 = np.array([c2['x'], c2['y'], c2['z']])
            distance = np.sqrt(((xyz1 - xyz2)**2).sum())
            if distance < DISTANCE_THRESH_CLEAR:
                if c1['confidence'] < c2['confidence']:
                    c1['confidence'] = -1
    return [c for c in coords if c['confidence'] > 0]

In [0]:
def extract_coords(prediction, ign_mask):
    assert ign_mask.shape[0] == ORIG_H   #
    logits = prediction[0]
    regr_output = prediction[1:]
    points_mat = postprocess_heatmap(logits) 
    points = np.argwhere( points_mat > 0 )
    
    col_names = sorted(REGR_TARGETS)  # vdiff,udiff,z,yaw,pitch_sin,pitch_cos,roll
    coords = []
    for r, c in points:           
        regr_dict = dict(zip(col_names, regr_output[:, r, c]))
        
        # use heatmap-peak (V,U) position
        regr_backed = _regr_back(regr_dict, r, c)
        
        _U, _V = XYZ2UV(regr_backed["x"], regr_backed["y"], regr_backed["z"])
        _U, _V = int(_U), int(_V)
        if _V>=0 and _V<ORIG_H and _U>=0 and _U<ORIG_W and ign_mask[_V,_U] > 0.5:  # floor(u), floor(v)
            # print("point is in ignore_mask")
            continue

        coords.append(regr_backed)
        coords[-1]['confidence'] = 1 / (1 + np.exp(-logits[r, c]))

        coords = clear_duplicates(coords)
    return coords

# Load each model and find prediction time

In [31]:
import datetime
time_start = datetime.datetime.now()
print(time_start)
time_end = datetime.datetime.now()
print(time_end)

time_delta = time_end - time_start

print(time_delta)
print(type(time_delta))

time_avg = time_delta / 2
print(time_avg)

2020-03-09 18:23:05.459245
2020-03-09 18:23:05.460032
0:00:00.000787
<class 'datetime.timedelta'>
0:00:00.000394


In [32]:
import datetime

import gc
torch.cuda.empty_cache()
gc.collect()

0

In [33]:
print(f"{os.listdir(model_path_dir)}\n\n")
dict_models = {}
for eachmodel in os.listdir(model_path_dir):
    dict_models[eachmodel] = model_path_dir + eachmodel

print(f"{dict_models}")

['model_kagg_rohit-center-resnet-1_ep4_loss_bct_mask.pth', 'model_kagg-rohit-center-resnet-1_ep4-loss-focal-mask.pth', 'model_kagg_manmeet-center-resnet-1_ep10_loss_focal_mask.pth', 'model_kagg_manmeet_centernet_resnet_ep_6_loss_focal_mask_20200301_0200.pth']


{'model_kagg_rohit-center-resnet-1_ep4_loss_bct_mask.pth': '/content/drive/My Drive/baidu/Models/Cent-Resnet18_WITHmask/model_kagg_rohit-center-resnet-1_ep4_loss_bct_mask.pth', 'model_kagg-rohit-center-resnet-1_ep4-loss-focal-mask.pth': '/content/drive/My Drive/baidu/Models/Cent-Resnet18_WITHmask/model_kagg-rohit-center-resnet-1_ep4-loss-focal-mask.pth', 'model_kagg_manmeet-center-resnet-1_ep10_loss_focal_mask.pth': '/content/drive/My Drive/baidu/Models/Cent-Resnet18_WITHmask/model_kagg_manmeet-center-resnet-1_ep10_loss_focal_mask.pth', 'model_kagg_manmeet_centernet_resnet_ep_6_loss_focal_mask_20200301_0200.pth': '/content/drive/My Drive/baidu/Models/Cent-Resnet18_WITHmask/model_kagg_manmeet_centernet_resnet_ep_6_loss_focal_mask

In [34]:
dict_models.pop('model_kagg_rohit-center-resnet-1_ep4_loss_bct_mask.pth')
dict_models.pop('model_kagg_manmeet-center-resnet-1_ep10_loss_focal_mask.pth')
dict_models.pop('model_kagg_manmeet_centernet_resnet_ep_6_loss_focal_mask_20200301_0200.pth')

'/content/drive/My Drive/baidu/Models/Cent-Resnet18_WITHmask/model_kagg_manmeet_centernet_resnet_ep_6_loss_focal_mask_20200301_0200.pth'

In [0]:
## running only 3rd model = 'model_kagg-rohit-center-resnet-1_ep4-loss-focal-mask.pth'

In [36]:
print(f"{dict_models}")

{'model_kagg-rohit-center-resnet-1_ep4-loss-focal-mask.pth': '/content/drive/My Drive/baidu/Models/Cent-Resnet18_WITHmask/model_kagg-rohit-center-resnet-1_ep4-loss-focal-mask.pth'}


In [37]:
len(df_dev)

426

In [38]:
print(f"USEMASK set as = {USEMASK}\n\n")

#NUMBER_IMAGES_TO_PREDICT = 2  # for testing
NUMBER_IMAGES_TO_PREDICT = len(df_dev)  # uncomment to use full 426 values of dev

print(f"NUMBER_IMAGES_TO_PREDICT = {NUMBER_IMAGES_TO_PREDICT}\n")

time_start = None
time_end = None
time_delta = None
time_avg = None

model_idx=1

for each_model_name, each_model_path in dict_models.items():
    #
    print(f"\n--------- For model number #{model_idx} ---------\n")
    #
    torch.cuda.empty_cache()
    gc.collect()
    model = None
    saved_model = None
    time_start = None
    time_end = None
    time_avg = None
    #
    ## Load the saved model
    saved_model = CentResnet(8).to(device)
    saved_model.load_state_dict(torch.load(each_model_path))
    model = saved_model
    #saved_model.eval()
    #type(model)
    #
    time_start = datetime.datetime.now()
    for idx in range(NUMBER_IMAGES_TO_PREDICT):
        img, heatmap, regr, ign_mask, ign_mask_for_feed = dev_dataset[idx]
        mask = (heatmap >= 1).astype(float)
    
        if USEMASK:
            output = infer_image(img[None], ign_mask_for_feed[None])
            output = output.data.cpu().numpy()
        else:
            output = model(torch.tensor(img[None]).to(device)).data.cpu().numpy()
    #
    time_end = datetime.datetime.now()
    time_delta = time_end - time_start
    time_avg = time_delta / NUMBER_IMAGES_TO_PREDICT
    
    print(f"model = {each_model_name}\nTotal time_delta = {time_delta}\naverage time per image = {time_avg}\n\npicked from location = {each_model_path}")
    
    model_idx += 1

print(f"\n\nDONE\n\n")

USEMASK set as = True


NUMBER_IMAGES_TO_PREDICT = 426


--------- For model number #1 ---------

model = model_kagg-rohit-center-resnet-1_ep4-loss-focal-mask.pth
Total time_delta = 0:14:26.332962
average time per image = 0:00:02.033645

picked from location = /content/drive/My Drive/baidu/Models/Cent-Resnet18_WITHmask/model_kagg-rohit-center-resnet-1_ep4-loss-focal-mask.pth


DONE


