<a href="https://colab.research.google.com/github/ko74dev/AI/blob/main/dist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Определение расстояния до впереди идущего автомобиля на основе изображений

## Устанавливаем недостающие пакеты на colab

In [1]:
!pip install pyheif
!pip install transformers
!pip install catboost

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyheif
  Downloading pyheif-0.7.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.8 MB)
[K     |████████████████████████████████| 9.8 MB 19.1 MB/s 
Installing collected packages: pyheif
Successfully installed pyheif-0.7.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.21.2-py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 28.3 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 57.8 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.9.1-py3-none-any.whl (120 kB)
[K     |████████████████████████████████| 120 kB 59.2 MB/s 
Installing collected packages: tokenizers, 

###  Импорт необходимых пакетов

In [2]:
import pandas as pd
import torch
from PIL import Image
import pyheif 
from tqdm.notebook import tqdm
from transformers import YolosFeatureExtractor, YolosForObjectDetection
from sklearn.model_selection import train_test_split
from catboost import CatBoostRegressor

## Подготовка датасета для обучения

Скачиваем и распаковываем предостпвленнвый dataset для обучения

In [3]:
# или 
# !wget https://lodmedia.hb.bizmrg.com/case_files/791333/train_dataset_train.zip
# ! unzip train_dataset_train.zip -x
# или
from google.colab import drive
drive.mount('/content/drive')
! unzip /content/drive/MyDrive/train_dataset_train.zip -x

Mounted at /content/drive
Archive:  /content/drive/MyDrive/train_dataset_train.zip
   creating: train/
  inflating: train/img_1596.jpg      
  inflating: train/img_1600.jpg      
  inflating: train/img_1601.jpg      
  inflating: train/img_1603.jpg      
  inflating: train/img_1605.jpg      
  inflating: train/img_1606.jpg      
  inflating: train/img_1607.jpg      
  inflating: train/img_1608.jpg      
  inflating: train/img_1612.jpg      
  inflating: train/img_1613.jpg      
  inflating: train/img_1615.jpg      
  inflating: train/img_1616.jpg      
  inflating: train/img_1618.jpg      
  inflating: train/img_1619.jpg      
  inflating: train/img_1620.jpg      
  inflating: train/img_1621.jpg      
  inflating: train/img_1623.jpg      
  inflating: train/img_1626.jpg      
  inflating: train/img_1627.jpg      
  inflating: train/img_1628.jpg      
  inflating: train/img_1631.jpg      
  inflating: train/img_1634.jpg      
  inflating: train/img_1635.jpg      
  inflating: train/img_

Читаем имена файлов и расстояния в датафрейм

In [101]:
train_labels_df = pd.read_csv('train.csv', sep=';', index_col=None)
train_labels_df.tail(2)

Unnamed: 0,image_name,distance
528,img_2944.jpg,5.2
529,img_2945.jpg,5.54


Для определения местоположения автомобиля на изображении будем использовать трансформер YOLOS

In [110]:
extractor = YolosFeatureExtractor.from_pretrained("hustvl/yolos-base", 
                                                  do_resize=True, size=672)

model = YolosForObjectDetection.from_pretrained("hustvl/yolos-base")

### Извлечение данных из изображений

In [111]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval();

In [112]:
train_data = []                                     # Массив с извлекаемыми данными

for img_name in tqdm(train_labels_df.image_name):   # Перебираем имена файлов
    if 'heic' in img_name:                          # Чтение heic изображения
        heif_file = pyheif.read(f'train/{img_name}')
        img = Image.frombytes(heif_file.mode, heif_file.size, 
                              heif_file.data, "raw", 
                              heif_file.mode, heif_file.stride)
    else:                                           # Чтение др-х изображений
        img = Image.open(f'train/{img_name}')
    # Подготовка данных для модели
    inputs = extractor(images=img, return_tensors="pt").pixel_values
    # Вывод модели для текущего изображения
    outputs = model(inputs.to(device))
    # Применение softmax к выводу для задачи классификации
    # Выбор только 3-го класса (автомобилей)
    prob = outputs.logits.softmax(-1)[0, :, 3]
    # # Из всех прогнозов выбираем максимальный
    # max_prob = prob.argmax()
    # # Добавление параметров обрамляющей рамки и вероятность прогноза к результату
    # train_data.append(outputs.pred_boxes[0, max_prob].cpu().tolist() + 
    #                   [prob[max_prob].cpu().item()])

    # Из всех прогнозов выбираем правдоподобные
    keep = prob > .33
    pred_boxes = outputs.pred_boxes[0, keep]
    # в центре внимания
    if len(pred_boxes)>0:
      max_prob = (pred_boxes[:, 0] - .5).abs().argmin()
      prob = prob[keep][max_prob]
      train_data.append(pred_boxes[max_prob].cpu().tolist() + [prob.cpu().item()])
    else:
      prob = outputs.logits.softmax(-1)[0, :, 3]
      max_prob = prob.argmax()
      train_data.append(outputs.pred_boxes[0, max_prob].cpu().tolist() + 
                        [prob[max_prob].cpu().item()])
    del inputs


  0%|          | 0/530 [00:00<?, ?it/s]

In [113]:
from torch import no_grad, cuda
import gc
cuda.empty_cache()
gc.collect()
with no_grad():
  for m in model.children():
    m.cuda()
    m.eval()
    # x = m(x)
    m.cpu()
    cuda.empty_cache()

cuda.empty_cache()
gc.collect()

0

Поместим извдеченные данные в датафрейм и объеденим с train_labels_df

In [114]:
len(train_data)

530

In [115]:
train_labels_df = pd.concat([train_labels_df,
            pd.DataFrame(train_data, columns = ['x_c', 'y_c', 'dx', 'dy', 'conf'], index=train_labels_df.index)],
                            axis=1)

In [116]:
# train_labels_df = train_labels_df.iloc[:,[0,1,7,8,9,10,11]]

train_labels_df.tail(2)

Unnamed: 0,image_name,distance,x_c,y_c,dx,dy,conf
528,img_2944.jpg,5.2,0.455869,0.490569,0.133825,0.155531,0.99743
529,img_2945.jpg,5.54,0.468002,0.515713,0.123505,0.149222,0.977843


In [117]:
train_labels_df.to_csv('/content/drive/MyDrive/train_Yb672.csv')

### расширение данных
Наш датасет маленький, проведем расширение данных за счет симметричного отражения по горизогтали

In [118]:
train_data_aug = train_labels_df.copy()
train_data_aug['x_c'] = 1-train_labels_df['x_c']

train_labels_df = pd.concat([train_labels_df, train_data_aug], axis=0, ignore_index=True)
train_labels_df.tail(2)

Unnamed: 0,image_name,distance,x_c,y_c,dx,dy,conf
1058,img_2944.jpg,5.2,0.544131,0.490569,0.133825,0.155531,0.99743
1059,img_2945.jpg,5.54,0.531998,0.515713,0.123505,0.149222,0.977843


### Добавим новые признаки

In [119]:
train_labels_df['r'] = train_labels_df.dx / train_labels_df.dy
train_labels_df['d'] = train_labels_df.x_c**2 + .25

## Обучение итоговой модели
### Разделим данные для обучения и валидации

In [120]:
X_train, X_val, y_train, y_val = train_test_split(
                        train_labels_df[['x_c', 'y_c', 'dx', 'dy', 'conf', 'r','d']],
                        train_labels_df['distance'],
                        test_size=0.15, random_state=7)

### Обучение модели

In [121]:
cbr = CatBoostRegressor(eval_metric='R2', random_seed=0, iterations=10000)

cbr.fit(X_train,  y_train, eval_set=(X_val, y_val), early_stopping_rounds=100,
       verbose = 100)

Learning rate set to 0.012297
0:	learn: 0.0174450	test: 0.0122747	best: 0.0122747 (0)	total: 2.24ms	remaining: 22.4s
100:	learn: 0.7680315	test: 0.7863465	best: 0.7863465 (100)	total: 188ms	remaining: 18.5s
200:	learn: 0.8838580	test: 0.8973329	best: 0.8973329 (200)	total: 373ms	remaining: 18.2s
300:	learn: 0.9125054	test: 0.9193762	best: 0.9193762 (300)	total: 555ms	remaining: 17.9s
400:	learn: 0.9271897	test: 0.9276115	best: 0.9276115 (400)	total: 739ms	remaining: 17.7s
500:	learn: 0.9372853	test: 0.9327953	best: 0.9327953 (500)	total: 934ms	remaining: 17.7s
600:	learn: 0.9450092	test: 0.9366396	best: 0.9366396 (600)	total: 1.12s	remaining: 17.5s
700:	learn: 0.9511135	test: 0.9396793	best: 0.9396793 (700)	total: 1.31s	remaining: 17.3s
800:	learn: 0.9565098	test: 0.9426448	best: 0.9426448 (800)	total: 1.49s	remaining: 17.2s
900:	learn: 0.9609887	test: 0.9452625	best: 0.9452625 (900)	total: 1.68s	remaining: 17s
1000:	learn: 0.9644000	test: 0.9476523	best: 0.9476523 (1000)	total: 1.9s	r

<catboost.core.CatBoostRegressor at 0x7f2f9170d9d0>

In [None]:
0.9912549682
0.9615683365
0.94343543

In [89]:
cbr.feature_importances_

array([ 2.59855376, 13.69615139, 35.84991544, 29.83916996,  6.18016802,
        9.04219195,  2.79384948])

In [136]:
cbr.get_all_params()

{'nan_mode': 'Min',
 'eval_metric': 'R2',
 'iterations': 10000,
 'sampling_frequency': 'PerTree',
 'leaf_estimation_method': 'Newton',
 'od_pval': 0,
 'grow_policy': 'SymmetricTree',
 'penalties_coefficient': 1,
 'boosting_type': 'Plain',
 'model_shrink_mode': 'Constant',
 'feature_border_type': 'GreedyLogSum',
 'bayesian_matrix_reg': 0.10000000149011612,
 'force_unit_auto_pair_weights': False,
 'l2_leaf_reg': 3,
 'random_strength': 1,
 'od_type': 'Iter',
 'rsm': 1,
 'boost_from_average': True,
 'model_size_reg': 0.5,
 'pool_metainfo_options': {'tags': {}},
 'subsample': 0.800000011920929,
 'use_best_model': True,
 'od_wait': 100,
 'random_seed': 0,
 'depth': 6,
 'posterior_sampling': False,
 'border_count': 254,
 'classes_count': 0,
 'auto_class_weights': 'None',
 'sparse_features_conflict_fraction': 0,
 'leaf_estimation_backtracking': 'AnyImprovement',
 'best_model_min_trees': 1,
 'model_shrink_rate': 0,
 'min_data_in_leaf': 1,
 'loss_function': 'RMSE',
 'learning_rate': 0.0122969998

In [143]:
cbr1 = CatBoostRegressor(eval_metric='R2', random_seed=1, iterations=10000,
                         learning_rate=.005, l2_leaf_reg=5, depth=7)

cbr1.fit(X_train,  y_train, eval_set=(X_val, y_val), early_stopping_rounds=50,
       verbose = 100, init_model=cbr)

0:	learn: 0.9997758	test: 0.9912555	best: 0.9912555 (0)	total: 3.33ms	remaining: 33.3s
100:	learn: 0.9997805	test: 0.9912646	best: 0.9912649 (98)	total: 303ms	remaining: 29.7s
200:	learn: 0.9997852	test: 0.9912753	best: 0.9912753 (199)	total: 601ms	remaining: 29.3s
300:	learn: 0.9997894	test: 0.9912809	best: 0.9912809 (300)	total: 911ms	remaining: 29.3s
400:	learn: 0.9997939	test: 0.9912897	best: 0.9912899 (398)	total: 1.21s	remaining: 29s
500:	learn: 0.9997979	test: 0.9912987	best: 0.9912988 (493)	total: 1.51s	remaining: 28.7s
600:	learn: 0.9998016	test: 0.9913072	best: 0.9913076 (598)	total: 1.83s	remaining: 28.6s
700:	learn: 0.9998056	test: 0.9913146	best: 0.9913146 (698)	total: 2.14s	remaining: 28.4s
800:	learn: 0.9998098	test: 0.9913213	best: 0.9913215 (793)	total: 2.45s	remaining: 28.1s
900:	learn: 0.9998133	test: 0.9913285	best: 0.9913285 (900)	total: 2.76s	remaining: 27.9s
1000:	learn: 0.9998169	test: 0.9913369	best: 0.9913369 (1000)	total: 3.06s	remaining: 27.5s
1100:	learn: 0

<catboost.core.CatBoostRegressor at 0x7f2f919a1d50>

In [None]:
0.9919964962

## Подготовка данных для отправки на тестирование

In [23]:
# или 
# !wget https://lodmedia.hb.bizmrg.com/case_files/791333/test_dataset_test.zip
# ! unzip train_dataset_train.zip -x
# или
! unzip /content/drive/MyDrive/test_dataset_test.zip -x

Archive:  /content/drive/MyDrive/test_dataset_test.zip
   creating: test/
  inflating: test/img_1597.jpg       
  inflating: test/img_1598.jpg       
  inflating: test/img_1599.jpg       
  inflating: test/img_1602.jpg       
  inflating: test/img_1604.jpg       
  inflating: test/img_1609.jpg       
  inflating: test/img_1610.jpg       
  inflating: test/img_1611.jpg       
  inflating: test/img_1617.jpg       
  inflating: test/img_1622.jpg       
  inflating: test/img_1625.jpg       
  inflating: test/img_1629.jpg       
  inflating: test/img_1630.jpg       
  inflating: test/img_1632.jpg       
  inflating: test/img_1633.jpg       
  inflating: test/img_1636.jpg       
  inflating: test/img_1638.jpg       
  inflating: test/img_1639.jpg       
  inflating: test/img_1642.jpg       
  inflating: test/img_1644.jpg       
  inflating: test/img_1646.jpg       
  inflating: test/img_1648.jpg       
  inflating: test/img_1651.jpg       
  inflating: test/img_1652.jpg       
  inflating: t

In [144]:
sample_solution = pd.read_csv('sample_solution.csv', sep=';')
sample_solution.tail(2)

Unnamed: 0,image_name,distance
519,img_2571.jpg,0.0
520,img_2674.heic,0.0


In [123]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval();

test_data = []                                     # Массив с извлекаемыми данными

for img_name in tqdm(sample_solution.image_name):   
    if 'heic' in img_name:                          # Чтение heic изображения
        heif_file = pyheif.read(f'test/{img_name}')
        img = Image.frombytes(heif_file.mode, heif_file.size, 
                              heif_file.data, "raw", 
                              heif_file.mode, heif_file.stride)
    else:                                           # Чтение др-х изображений
        img = Image.open(f'test/{img_name}')
    inputs = extractor(images=img, return_tensors="pt")
    outputs = model(**inputs.to(device))
    prob = outputs.logits.softmax(-1)[0, :, 3].cpu()
    # max_prob = prob.argmax()
    # test_data.append(outputs.pred_boxes[0, max_prob].cpu().tolist() + 
    #                   [prob[max_prob].item()])
    
    keep = prob > .33
    pred_boxes = outputs.pred_boxes[0, keep]
    # в центре внимания
    if len(pred_boxes)>0:
      max_prob = (pred_boxes[:, 0] - .5).abs().argmin()
      prob = prob[keep][max_prob]
      test_data.append(pred_boxes[max_prob].cpu().tolist() + [prob.cpu().item()])
    else:
      prob = outputs.logits.softmax(-1)[0, :, 3]
      max_prob = prob.argmax()
      test_data.append(outputs.pred_boxes[0, max_prob].cpu().tolist() + 
                        [prob[max_prob].cpu().item()])
    del inputs

  0%|          | 0/521 [00:00<?, ?it/s]

In [124]:
len(test_data)

521

In [145]:
test_df = pd.DataFrame(test_data, columns = ['x_c', 'y_c', 'dx', 'dy', 'conf'])
# test_df.to_csv('/content/drive/MyDrive/test_Yb672.csv')

In [146]:
test_df['r'] = test_df.dx / test_df.dy
test_df['d'] = test_df.x_c**2 + .25

In [127]:
test_df.shape

(521, 7)

In [147]:
sample_solution['distance'] = cbr1.predict(test_df[['x_c', 'y_c', 'dx', 'dy', 'conf', 'r', 'd']])
test_df.tail(2)

Unnamed: 0,x_c,y_c,dx,dy,conf,r,d
519,0.452857,0.494657,0.42895,0.564746,0.991061,0.759545,0.455079
520,0.08209,0.3426,0.121342,0.084786,0.967029,1.431157,0.256739


In [148]:
sample_solution.to_csv('/content/drive/MyDrive/sub1-2.csv', sep=';', index=False)

In [149]:
sample_solution.tail(2)

Unnamed: 0,image_name,distance
519,img_2571.jpg,2.331276
520,img_2674.heic,3.317729
