In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install timm

In [None]:
!pip install pyheif

In [None]:
!pip install catboost

In [None]:
import cv2
import torch
import urllib.request
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import os
from tqdm import tqdm
import pyheif
import pandas as pd
from torchvision.io import read_image
from torchvision import transforms as T
import torchvision
from torchvision.ops import box_iou
import catboost
from torchvision.utils import draw_bounding_boxes

In [None]:
!mkdir /content/heic_images

In [None]:
depth_maps_path = '/content/drive/MyDrive/Depth_Maps_with_MiDaS/'
train_path = '/content/drive/MyDrive/Distance_calculation_dataset/train/'
test_path = '/content/drive/MyDrive/Distance_calculation_dataset/test/'

# Cars detection with detectron2

In [None]:
!python -m pip install pyyaml==5.1
#!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
!pip install git+https://github.com/facebookresearch/detectron2.git@5aeb252b194b93dc2879b4ac34bc51a31b5aee13

In [None]:
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
import cv2

In [None]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)

In [None]:
import math
#select the index of the car to which the distance should be calculated
def get_car_box_idx(img, boxes):
    w, h = img.shape[1], img.shape[0]
    is_center_point_inside = [box[2]>w/2 and box[0]<w/2 and box[3]>h/2 and box[1]<h/2 
                              for box in boxes]
    squares = [min(w/2-box[0],box[2]-w/2)*min(box[3]-h/2,h/2-box[1]) for box in boxes]                          
    dist_list = np.array([(((boxes[i][0]-w/2)**2 + (boxes[i][1]-h/2)**2)
                 + ((boxes[i][2]-w/2)**2 + (boxes[i][3]-h/2)**2)) /
                 (abs(boxes[i][2]-boxes[i][0])*abs(boxes[i][3]-boxes[i][1])+is_center_point_inside[i]*squares[i]**2)
                 for i in range(len(boxes))])
    return np.argmin(dist_list)

In [None]:
def get_features_with_detectron2(model, data_path):
    """
        get the coordinates of the cars bounding boxes and confidence of model
    """
    dataframe=pd.DataFrame([],columns=['x_min_car', 'y_min_car', 'x_max_car', 'y_max_car', 'confidence'])
    for img_name in tqdm(os.listdir(data_path)):
        img_path = data_path + img_name
        if 'heic' in img_path:
            heif_file = pyheif.read(img_path)
            img = Image.frombytes(heif_file.mode, heif_file.size, heif_file.data, "raw")
            img.save('/content/heic_images/'+img_name.split('.')[0]+'h.jpg')
            img_path = '/content/heic_images/'+img_name.split('.')[0]+'h.jpg'
        input = cv2.imread(img_path)
        input_img = read_image(img_path)
        outputs = predictor(input)
        all_boxes = [box for box in outputs['instances'].pred_boxes.to(torch.device('cpu'))]
        pred_classes = outputs['instances'].pred_classes
        car_boxes = torch.tensor([all_boxes[idx].tolist() 
                                  for idx in torch.argwhere(pred_classes==2).reshape(-1)])
        transform = T.ToPILImage()
        if len(car_boxes) > 0:
              car_box_idx = get_car_box_idx(input, car_boxes)
              #image output
              #annotated_img = transform(draw_bounding_boxes(input_img, car_boxes[car_box_idx].unsqueeze(0), colors = ['green', 'white', 'blue','yellow', 'red', 'brown','pink','violet', 
              #                                                                                                       'grey', 'cyan', 'black', 'orange'], width=5))
              #annotated_img.save('/content/drive/MyDrive/cars_detection_by_detectron2_test/'+img_name.split('.')[0]+'.jpg')
              dataframe.loc[img_name] = torch.cat((car_boxes[car_box_idx], 
                                                   torch.tensor([outputs['instances'].scores[car_box_idx].to(torch.device('cpu'))])), dim=0).detach().numpy()
    return dataframe

In [None]:
car_boxes_train_df = get_features_with_detectron2(predictor, train_path)

## Load MiDaS to get depth maps

In [None]:
model_type = "DPT_Large"     # MiDaS v3 - Large     (highest accuracy, slowest inference speed)

midas = torch.hub.load("intel-isl/MiDaS", model_type)


In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()

In [None]:
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
    transform = midas_transforms.dpt_transform
else:
    transform = midas_transforms.small_transform

# Get relatives distances with MiDaS

In [None]:
def get_relative_distances_to_cars(midas, data_path, transform, boxes_df):
    """
        > get a representation of images in the form of depth maps
        > separate the matrix of pixels inside the bounding box corresponding to the car
        > find the median value of the pixels in the resulting matrix
    """
    result_dataframe=pd.DataFrame([],columns=['median_depth_value'])
    device = torch.device('cuda')
    midas.to(device)
    midas.eval()
    for img_name in tqdm(os.listdir(data_path)):
        img_path=data_path+img_name
        if 'heic' in img_path:
            heif_file = pyheif.read(img_path)
            img = Image.frombytes(heif_file.mode, heif_file.size, heif_file.data, "raw")
            img.save('/content/heic_images/'+img_name.split('.')[0]+'h.jpg')
            img_path = '/content/heic_images/'+img_name.split('.')[0]+'h.jpg'
        
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        input_batch = transform(img).to(device)

        with torch.no_grad():
            prediction = midas(input_batch)

            prediction = torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=img.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze()
        #get depth maps
        output = prediction.cpu().numpy()
        if img_name in boxes_df.index:
            boxes_coords=boxes_df.loc[img_name]
            y_max=int(boxes_coords['y_max_car'])
            y_min=int(boxes_coords['y_min_car'])
            x_max=int(boxes_coords['x_max_car'])
            x_min=int(boxes_coords['x_min_car'])
            #get necessary matrix
            car_depth_array=output[y_min:y_max, x_min:x_max]
            #count median value
            result_dataframe.loc[img_name]=np.median(np.ravel(car_depth_array))
    return result_dataframe

In [None]:
car_depths_df=get_relative_distances_to_cars(midas, train_path, transform, car_boxes_train_df)

In [None]:
#load targets
distances_df = pd.read_csv('/content/drive/MyDrive/Distance_calculation_dataset/train.csv', sep=';', index_col=None)
distances_df = distances_df.set_index([distances_df['image_name']])

# Car plates detection

In [None]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
model8epochs = torchvision.models.detection.maskrcnn_resnet50_fpn()
in_features = model8epochs.roi_heads.box_predictor.cls_score.in_features
num_classes = 2
model8epochs.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

hidden_layer = 256
in_features_mask = model8epochs.roi_heads.mask_predictor.conv5_mask.in_channels
model8epochs.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)
#load pretrained model
model8epochs.load_state_dict(torch.load('/content/drive/MyDrive/mask-rcnn_pretrained3_carplate_dataset.pt', map_location=torch.device('cpu')))

In [None]:
def get_features_from_carplate_detector(data_path, model, car_boxes_df):
    """
        > get the coordinates of the car plates bounding boxes and confidence of model
    """
    model.eval()
    device=torch.device("cuda")
    model.to(device)
    dataframe=pd.DataFrame([],columns=['x_min', 'y_min', 'x_max', 'y_max', 'carplate_conf'])
    for img_name in tqdm(os.listdir(data_path)):
        img_path = data_path + img_name
        if 'heic' in img_path:
            heif_file = pyheif.read(img_path)
            img = Image.frombytes(heif_file.mode, heif_file.size, heif_file.data, "raw")
        else: 
            img = Image.open(img_path).convert('RGB')
        transform = T.ToTensor()
        input = transform(img).unsqueeze(0)
        input=input.to(device)
        output=model(input)
        if len(output[0]['boxes'])==0:
            print(img_name)
            continue
        
        carplate_box_idx=get_carplate_box_idx(data_path, img_name, output, car_boxes_df)
        dataframe.loc[img_name] = torch.cat((output[0]['boxes'][carplate_box_idx].to(torch.device('cpu')),
                   torch.tensor([output[0]['scores'][carplate_box_idx]])), dim=0).detach().numpy()
    return dataframe

In [None]:
import math
def get_carplate_box_idx(data_path, img_name, output, car_boxes_df):
    """
        select necessary car plate box using intersection over union with
        choosen car bounding box. If car detector had problems with recognition
        we choose car plate which is closer to the center
    """
    if img_name in car_boxes_df.index:
          car_box=torch.tensor(car_boxes_df.loc[img_name][:4]).unsqueeze(0)
          iou_list = [float(box_iou(car_box, box.unsqueeze(0))) 
                            for box in output[0]['boxes'].to(torch.device('cpu'))]
          return np.argmax(iou_list)
    else:
          img = cv2.imread(data_path+img_name)
          w, h = img.shape[1], img.shape[0]
          dist_list = np.array([float(((box[0]-w/2)**2 + (box[1]-h/2)**2
                 + (box[2]-w/2)**2 + (box[3]-h/2)**2)/(abs(box[2]-box[0])*abs(box[3]-box[1]))) 
                 for box in output[0]['boxes'].to(torch.device('cpu'))])
          return np.argmin(dist_list)

In [None]:
carplate_boxes_df = get_features_from_carplate_detector(train_path,  model8epochs, car_boxes_train_df)

In [34]:
# concatenate all features and target columns
train_dataframe = train_dataframe = pd.concat([car_depths_df, distances_df, car_boxes_train_df, carplate_boxes_df], axis=1).drop(columns=['image_name'])

# Train catboost regressor

In [35]:
# create synthetic features
train_dataframe['car_width']=train_dataframe['x_max_car']-train_dataframe['x_min_car']
train_dataframe['width']=train_dataframe['x_max']-train_dataframe['x_min']
train_dataframe['height']=train_dataframe['y_max']-train_dataframe['y_min']

Look at the correlation of the columns in the data

In [36]:
corr_df = train_dataframe.corr()
corr_df.style.background_gradient(cmap='coolwarm')

Unnamed: 0,median_depth_value,distance,x_min_car,y_min_car,x_max_car,y_max_car,confidence,x_min,y_min,x_max,y_max,carplate_conf,car_width,width,height
median_depth_value,1.0,-0.769165,-0.60382,-0.629889,0.680166,0.736896,0.041683,-0.392762,0.440013,0.49023,0.545335,0.254249,0.803653,0.7909,0.758147
distance,-0.769165,1.0,0.697868,0.723782,-0.593474,-0.661854,-0.119263,0.420358,-0.374551,-0.491369,-0.494162,-0.290825,-0.800483,-0.815161,-0.786777
x_min_car,-0.60382,0.697868,1.0,0.65514,-0.283374,-0.651427,0.192751,0.631332,-0.426289,-0.327902,-0.546103,-0.250493,-0.769247,-0.838796,-0.818421
y_min_car,-0.629889,0.723782,0.65514,1.0,-0.614188,-0.465608,0.00268,0.38093,-0.102914,-0.491212,-0.25036,-0.202599,-0.789483,-0.781879,-0.757085
x_max_car,0.680166,-0.593474,-0.283374,-0.614188,1.0,0.780789,0.040031,-0.175597,0.422447,0.671166,0.529709,0.063266,0.830746,0.778483,0.756886
y_max_car,0.736896,-0.661854,-0.651427,-0.465608,0.780789,1.0,-0.1048,-0.426127,0.680597,0.556335,0.77914,0.140806,0.898328,0.88108,0.868826
confidence,0.041683,-0.119263,0.192751,0.00268,0.040031,-0.1048,1.0,0.054451,-0.070524,-0.027431,-0.078943,0.180833,-0.08521,-0.071545,-0.081566
x_min,-0.392762,0.420358,0.631332,0.38093,-0.175597,-0.426127,0.054451,1.0,-0.262006,0.373359,-0.329422,-0.262571,-0.469749,-0.489415,-0.473637
y_min,0.440013,-0.374551,-0.426289,-0.102914,0.422447,0.680597,-0.070524,-0.262006,1.0,0.332388,0.983365,0.040841,0.516289,0.532639,0.514527
x_max,0.49023,-0.491369,-0.327902,-0.491212,0.671166,0.556335,-0.027431,0.373359,0.332388,1.0,0.421896,-0.068493,0.623974,0.626263,0.619665


In [None]:
model_regressor = catboost.CatBoostRegressor(num_boost_round=6000, loss_function='RMSE')
model_regressor.fit(train_dataframe.drop(columns=['distance']), 
                    train_dataframe['distance'])

# Make predictions

Get features columns and make predictions with catboost regressor

In [None]:
car_boxes_test_df = get_features_with_detectron2(predictor, test_path)

In [None]:
car_depths_df_test=get_relative_distances_to_cars(midas, test_path, transform, car_boxes_test_df)

In [None]:
carplate_boxes_test_df = get_features_from_carplate_detector(test_path,  model8epochs, car_boxes_test_df)

In [41]:
test_dataframe = pd.concat([car_boxes_test_df, car_depths_df_test, carplate_boxes_test_df], axis=1)
test_dataframe['car_width']=test_dataframe['x_max_car']-test_dataframe['x_min_car']
test_dataframe['width']=test_dataframe['x_max']-test_dataframe['x_min']
test_dataframe['height']=test_dataframe['y_max']-test_dataframe['y_min']

In [42]:
predictions = model_regressor.predict(test_dataframe)

# Create and save submission file

In [43]:
test_dataframe['distance'] = predictions
test_dataframe = test_dataframe.reset_index(level=0)
test_dataframe.rename(columns={'index':'image_name'}, inplace=True)

In [None]:
sample_solution_df = test_dataframe[['image_name', 'distance']]
sample_solution_df.loc[len(sample_solution_df)]=pd.Series({'image_name':'img_2674.heic','distance':4.8838})

In [45]:
sample_solution_df.to_csv('solution.csv', sep=';', index=False)