In [1]:
import numpy as np 
import pandas as pd
import cv2
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import array_to_img
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
import os
import gc

In [2]:
PATH = '/kaggle/input/kddbr-2022/' # Path to public.csv
TRAIN_PATH = '/kaggle/input/kddbr-2022/train/train' # Path to train images
TEST_PATH = '/kaggle/input/kddbr-2022/test/test' # Path to test images

TARGETS = ['North', 'East']

SEED = 42
np.random.seed(SEED)

In [3]:
df = pd.read_csv(os.path.join(PATH, 'public.csv'))
df.loc[df['North'].notna(), 'Filename'] = TRAIN_PATH + '/' + df.loc[df['North'].notna()]['Filename']
df.loc[df['North'].isna(), 'Filename'] = TEST_PATH + '/' + df.loc[df['North'].isna()]['Filename']
df.head()

Unnamed: 0,Filename,Altitude,Delta,North,East
0,/kaggle/input/kddbr-2022/train/train/00003e3b9...,178.829834,-0.065231,-0.386045,0.929772
1,/kaggle/input/kddbr-2022/train/train/0001261e2...,207.921478,-0.080688,0.635584,0.152819
2,/kaggle/input/kddbr-2022/train/train/0002ac0d7...,178.048431,0.021576,-1.228229,-0.499388
3,/kaggle/input/kddbr-2022/train/train/0004289ee...,201.084625,0.505981,-1.739709,-0.699928
4,/kaggle/input/kddbr-2022/train/train/0004d0b59...,187.550201,-0.328156,-0.169798,2.828752


# Feature Extraction

In [4]:
from tqdm import tqdm

feature_list = []
errors = []
for idx, file in enumerate(tqdm(df['Filename'])):

    img = cv2.imread(file)
    imgL = img[:, :120, :]
    imgR = img[:, 120:, :]

    img1 = cv2.cvtColor(imgL, cv2.COLOR_BGR2GRAY)
    img2 = cv2.cvtColor(imgR, cv2.COLOR_BGR2GRAY)
    
    # Initiate SIFT detector
    sift = cv2.SIFT_create()
    
    # find the keypoints and descriptors with SIFT
    kp1, des1 = sift.detectAndCompute(img1, None)
    kp2, des2 = sift.detectAndCompute(img2, None)
        
    if(len(kp1) < 2) or (len(kp2) < 2):
        errors.append(file)
        continue
    
    # FLANN parameters
    FLANN_INDEX_KDTREE = 1
    index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=3)
    search_params = dict(checks=50)   # or pass empty dictionary
    flann = cv2.FlannBasedMatcher(index_params, search_params)
    matches = flann.knnMatch(des1, des2, k=2)
        
    # Feature list
    filename = file.split('/')[-1]
    ratio_thresh = 0.70
    for i, (m, n) in enumerate(matches):
        if m.distance < ratio_thresh * n.distance:
            # Get the matching keypoints for each of the images
            img1_idx = m.queryIdx
            img2_idx = m.trainIdx
            # x - columns
            # y - rows
            # Get the coordinates
            (x1, y1) = kp1[img1_idx].pt
            (x2, y2) = kp2[img2_idx].pt
            # Append to each list
            # x1, y1, x2, y2, d1, d2 = np.round([x1, y1, x2, y2, m.distance, n.distance], 2)
            feature_list.append([filename, x1, y1, x2, y2, m.distance, n.distance])

100%|██████████| 146262/146262 [47:11<00:00, 51.65it/s]


In [5]:
print(f'Errors: {len(errors)}')
df_vecs = pd.DataFrame(feature_list, columns=['Filename', 'x1', 'y1', 'x2', 'y2', 'd1', 'd2'])
display(df_vecs)

Errors: 18960


Unnamed: 0,Filename,x1,y1,x2,y2,d1,d2
0,00003e3b9e5336685200ae85d21b4f5e.jpg,41.320774,23.368038,38.417023,22.075544,41.158230,379.491760
1,00003e3b9e5336685200ae85d21b4f5e.jpg,50.112000,14.717394,47.119362,13.866834,138.974823,404.693726
2,00003e3b9e5336685200ae85d21b4f5e.jpg,89.959831,96.846848,87.096352,95.666275,59.749477,528.926270
3,00003e3b9e5336685200ae85d21b4f5e.jpg,89.959831,96.846848,87.096352,95.666275,73.191528,528.499756
4,00003e3b9e5336685200ae85d21b4f5e.jpg,98.109146,37.000824,95.250710,35.858925,100.024994,401.827087
...,...,...,...,...,...,...,...
6058435,fffae68750a8bd5e6ba46b25ce7030de.jpg,107.966751,53.006016,113.788498,50.450706,192.255035,338.068054
6058436,fffae68750a8bd5e6ba46b25ce7030de.jpg,107.966751,53.006016,113.788498,50.450706,120.983467,366.187103
6058437,fffae68750a8bd5e6ba46b25ce7030de.jpg,109.485153,57.519825,114.831146,54.571133,203.948517,405.579834
6058438,fffae68750a8bd5e6ba46b25ce7030de.jpg,109.842178,49.165081,115.855515,46.714527,201.434860,327.218567


In [6]:
df[df['Filename'].isin(errors) & df['North'].isna()].shape[0]/df[df['North'].isna()].shape[0]

0.17406552670313097

In [7]:
df[df['Filename'].isin(errors) & df['North'].notna()].shape[0]/df[df['North'].notna()].shape[0]

0.10282688998257171

# Feature Engineering

In [8]:
df_vecs['x'] = df_vecs['x2'] - df_vecs['x1']
df_vecs['y'] = df_vecs['y2'] - df_vecs['y1']
df_vecs['res'] = (df_vecs['x']**2) + (df_vecs['y']**2)
df_vecs['d_ratio'] = df_vecs['d1']/df_vecs['d2']
df_vecs['x_ratio'] = df_vecs['x1']/df_vecs['x2']
df_vecs['y_ratio'] = df_vecs['y1']/df_vecs['y2']

In [9]:
%%time
df_agg = df_vecs.groupby('Filename').agg(['mean', 'std', 'min', 'max'])
df_agg.columns = ['_'.join(x) for x in df_agg.columns]
print(df_agg.shape)
df_agg

(125808, 48)
CPU times: user 3.37 s, sys: 287 ms, total: 3.66 s
Wall time: 3.66 s


Unnamed: 0_level_0,x1_mean,x1_std,x1_min,x1_max,y1_mean,y1_std,y1_min,y1_max,x2_mean,x2_std,x2_min,x2_max,y2_mean,y2_std,y2_min,y2_max,d1_mean,d1_std,d1_min,d1_max,d2_mean,d2_std,d2_min,d2_max,x_mean,x_std,x_min,x_max,y_mean,y_std,y_min,y_max,res_mean,res_std,res_min,res_max,d_ratio_mean,d_ratio_std,d_ratio_min,d_ratio_max,x_ratio_mean,x_ratio_std,x_ratio_min,x_ratio_max,y_ratio_mean,y_ratio_std,y_ratio_min,y_ratio_max
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1
00003e3b9e5336685200ae85d21b4f5e.jpg,82.413720,25.982775,41.320774,108.751274,56.230375,44.240026,11.455006,113.377670,79.533052,26.023670,38.417023,105.862633,55.059159,44.201747,10.161905,112.118355,81.517429,32.037378,41.158230,138.974823,430.339988,67.901233,379.491760,528.926270,-2.880668,0.060120,-2.992638,-2.794250,-1.171216,0.153395,-1.293100,-0.850560,9.693263,0.236973,9.474593,10.102312,0.194457,0.086039,0.108456,0.343407,1.041613,0.019501,1.027287,1.075585,1.044985,0.042155,1.011232,1.127250
000053b1e684c9e7ea73727b2238ce18.jpg,17.871966,5.317353,11.575399,23.943300,30.301484,18.397724,8.315718,47.437920,17.988740,4.967158,12.082825,23.552248,30.547530,18.129178,9.160202,47.561325,183.810083,44.374174,121.688126,230.698944,361.057910,28.111034,328.179840,397.067993,0.116774,0.386239,-0.391052,0.507425,0.246047,0.455382,-0.415493,0.844484,0.359418,0.302830,0.103524,0.866076,0.508432,0.118783,0.370797,0.677367,0.988566,0.024692,0.958004,1.016604,0.976710,0.040877,0.907809,1.011658
0001261e2060303a06ba6c64d676d639.jpg,29.791870,1.233735,28.367275,30.504168,29.033858,0.688712,28.238602,29.431486,29.586434,1.227276,28.169298,30.295002,30.043530,0.657744,29.284033,30.423279,52.765196,7.193823,46.314144,60.522724,367.926137,30.927856,334.623657,395.746124,-0.205436,0.006460,-0.209166,-0.197977,1.009672,0.030968,0.991793,1.045431,1.062309,0.060459,1.027403,1.132121,0.144298,0.023976,0.117030,0.162082,1.006946,0.000071,1.006904,1.007028,0.966367,0.001790,0.964300,0.967400
00029153d12ae1c9abe59c17ff2e0895.jpg,72.404167,32.212779,7.557237,112.702415,51.835195,32.203391,2.274911,111.906761,76.165145,31.393610,11.505971,115.413086,55.334445,32.286373,4.371530,113.730225,157.904673,56.390772,33.241539,260.040375,360.557536,31.641218,278.019775,425.874390,3.760978,1.228896,2.055489,6.340912,3.499250,1.863582,1.504486,8.965546,31.324445,27.625130,8.149811,118.842970,0.438840,0.153950,0.091165,0.694961,0.919905,0.087350,0.582905,0.978215,0.901645,0.089170,0.511670,0.986339
0004289ee1c7b8b08c77e19878106ae3.jpg,60.715979,31.178627,3.223390,114.755554,63.724199,28.716607,12.655911,114.372795,63.107041,30.725853,6.265714,116.554626,57.736026,28.830212,7.275239,108.868721,100.385628,54.629779,28.548204,245.252930,353.318984,36.660983,142.007050,457.592621,2.391062,0.517975,1.268173,4.007366,-5.988173,0.436724,-8.967373,-5.004376,42.031924,7.427977,28.280440,85.853937,0.285768,0.152585,0.078890,0.696958,0.932495,0.082682,0.510356,0.988724,1.165340,0.156381,1.048623,1.900664
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
fffe56994e6be696722539f962415cd2.jpg,84.066643,20.691192,69.435760,98.697525,64.282482,7.479308,58.993813,69.571152,106.593895,0.000000,106.593895,106.593895,80.176788,0.000000,80.176788,80.176788,176.455879,22.060025,160.857086,192.054672,289.474869,19.712781,275.535828,303.413910,22.527252,20.691192,7.896370,37.158134,15.894306,7.479308,10.605637,21.182976,1002.138802,694.474576,511.071121,1493.206484,0.613590,0.117992,0.530157,0.697023,0.788663,0.194112,0.651405,0.925921,0.801759,0.093285,0.735797,0.867722
fffe9f664c2ddba4a37bcd35936c7422.jpg,48.309702,35.200664,6.691044,100.191414,84.597187,21.828678,46.923447,110.758789,48.880739,35.086697,7.320388,100.638252,79.583611,21.700714,42.182049,105.515327,97.246745,71.535849,29.291637,245.721390,388.514327,59.417007,265.890961,463.031311,0.571036,0.250688,0.145199,1.009403,-5.013576,0.291582,-5.335190,-4.381027,25.596450,2.995255,19.555204,29.096373,0.254170,0.179130,0.067841,0.621349,0.968529,0.034303,0.914029,0.997680,1.068527,0.023136,1.049694,1.112403
fffee5badc626a2ab8086120712e5639.jpg,70.190686,5.970808,61.314877,75.793106,92.215009,13.550944,75.504784,109.747337,78.425362,5.903372,69.647491,84.012665,95.807223,13.523355,79.120369,113.304955,88.408943,49.763751,43.680660,173.383972,390.144435,15.478293,379.337585,419.380493,8.234676,0.077023,8.131104,8.332615,3.592214,0.054220,3.517509,3.674339,80.721279,1.527616,79.161018,82.933240,0.224042,0.117286,0.110581,0.413429,0.894422,0.009238,0.880360,0.902163,0.961851,0.005591,0.954303,0.968601
ffffda8ada4827900cbf65fd20281080.jpg,54.820404,31.770942,4.171877,116.725494,63.047979,29.666390,3.494223,109.967545,55.025719,31.729802,4.598378,116.912842,67.980034,29.878652,8.111012,115.458939,93.086136,46.763980,27.422619,260.051910,352.884910,31.171321,253.227173,420.424774,0.205315,0.206051,-1.608612,0.651852,4.932054,0.381137,4.134842,5.813086,24.553987,3.762546,17.099981,33.856638,0.265454,0.132512,0.073726,0.687240,0.992467,0.016073,0.907250,1.084755,0.900074,0.081914,0.430612,0.954765


In [10]:
df_agg.to_csv('vecs_v3_fe.csv')