In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from collections import Counter
from scipy.ndimage.interpolation import rotate

## Rotation
In the big-data-derby-2022 competition, information about a racehorse's speed and average curvature is critical. 

This information can be calculated as numerical approximations from the latitude and the longitude.

On the other hand, since the track is skewed. 

The skewed path implies a rounding error around the x and y axes. So, I think there is some need for rotation.


* This document introduces heuristic rotation.

In [None]:
df = pd.read_csv("../input/big-data-derby-2022/nyra_tracking_table.csv")
df = df.sort_values(by=['track_id','race_date','race_number','program_number','trakus_index']).reset_index(drop=True)
df.head(10)

# Skew

In [None]:
plt.figure(figsize=(15, 5))
for i, track_id in enumerate(['AQU','BEL','SAR']):
    print(f"track_id :{track_id}")
    position = df.loc[(df.track_id == track_id), ['longitude','latitude']].to_numpy()
    plt.subplot(1,3,i+1)
    plt.title(f"track_id :{track_id}")
    plt.scatter(position[::100, 0], position[::100, 1])
plt.show()

# Find a rotate angle

In [None]:
def get_feature(image):
    x_proj = image.sum(axis=0)
    y_proj = image.sum(axis=1)
        
    x_count = (x_proj > x_proj.mean() - x_proj.std()).sum()
    y_count = (y_proj > y_proj.mean()).sum()
    
    if y_count > 0:
        return x_count/y_count
    else:
        return 0.

In [None]:
track_id_list = ['AQU','BEL','SAR']
position_list = []
position_image_list = []

for track_id in track_id_list:
    position = df.loc[(df.track_id==track_id), ['longitude','latitude']].to_numpy()
    
    # round position
    rounded_pos = (position*1e5).round().astype('int')
    x_max, y_max = rounded_pos.max(axis=0)
    x_min, y_min = rounded_pos.min(axis=0)
    pos_img = np.zeros([y_max - y_min + 1, x_max-x_min + 1])
    
    pos_count_dict = Counter([tuple(x) for x in rounded_pos])
    for key in pos_count_dict:
        pos_img[key[1] -y_min,key[0]-x_min] = 1

    position_list.append(position)
    position_image_list.append(pos_img)

In [None]:
# plot pos_img
plt.figure(figsize=(12,4))
for i, item in enumerate(zip(track_id_list, position_image_list)):
    track_id, pos_img = item
    plt.subplot(1,3,i+1)
    plt.title(f"track_id :{track_id}")
    plt.imshow(pos_img, origin='lower')
plt.show()

In [None]:
angle_space = np.linspace(0, 180, 1000)

mean_angle_list = []
list_of_feature_list = []
for pos_img in position_image_list:
    feature_list = []
    for angle in tqdm(angle_space):
        rotated_pos_img = rotate(pos_img, angle)
        cur_feature = get_feature(rotated_pos_img)
        feature_list.append(cur_feature)
        
    feature_list = np.array(feature_list)
    
    max_idx = np.where(feature_list==np.max(feature_list))[0]
    mean_angle = angle_space[max_idx].mean()
    
    mean_angle_list.append(mean_angle)
    list_of_feature_list.append(feature_list)
print(mean_angle_list)

In [None]:
# plot
plt.figure(figsize=(18,8))

for i in range(3):
    track_id = track_id_list[i]
    pos_img = position_image_list[i]
    mean_angle = mean_angle_list[i]
    feature_list = list_of_feature_list[i]
    
    rotated_pos_img = rotate(pos_img, mean_angle)
    plt.subplot(2,3,i+1)
    plt.title(f"track_id :{track_id}")
    plt.imshow(rotated_pos_img, origin='lower')
    plt.subplot(2,3,i+4)
    plt.title(f"features with respect to rotation angles")
    plt.plot(feature_list)
    plt.xticks(np.arange(0, len(feature_list), len(feature_list)//10), 
               angle_space[::len(angle_space)//10].round(2), rotation=45)

plt.tight_layout()
plt.show()

# Rotate the array

In [None]:
def rotate_arr_clockwise(arr, angle):
    rad_angle = np.pi/180*angle
    mat = np.array([[np.cos(rad_angle), np.sin(rad_angle)],
                    [- np.sin(rad_angle), np.cos(rad_angle)]])
    return np.dot(mat, arr.T).T

In [None]:
angle_items = dict(zip(track_id_list, mean_angle_list))
angle_items

In [None]:
plt.figure(figsize=(18, 4))
for idx, (track_id, mean_angle) in enumerate(angle_items.items()):
    position = position_list[idx]
    rotated_position = rotate_arr_clockwise(position, mean_angle)
    
    # if start position x of some player is larger than center pos 
    if rotated_position[0, 0] > rotated_position[:, 0].mean():
        mean_angle += 180
        angle_items[track_id] = mean_angle
        rotated_position = rotate_arr_clockwise(position, mean_angle)
        
    plt.subplot(1, 3, idx+1)
    plt.title(f"track_id: {track_id}")
    plt.scatter(rotated_position[:, 0], rotated_position[:, 1], s=1)
    plt.grid(color='lightgray', linestyle='--', linewidth=1, alpha=0.5)

plt.show()
print(f"final rotate angle: {angle_items}")

This is a heuristic method based on the idea that minimize vertical size, and maximize horizontal size.

Thanks.