# Feature Engineering: Relative Position

## Imports

In [17]:
import pandas as pd
import numpy as np
import sympy
import plotly.express as px

## Read In

In [3]:
complete = pd.read_csv(
    '../data/nyra_2019_complete_target.csv',
    dtype = {
        'program_number': str,
        'track_condition': str,
        'race_type':str,
        'post_time':str,
        'initial_projected_position':int
    }
)

## Relative Position

In [8]:
complete.dtypes

track_id                               object
race_date                              object
race_number                             int64
program_number                         object
trakus_index                            int64
latitude                              float64
longitude                             float64
distance_id                             int64
course_type                            object
track_condition                        object
run_up_distance                         int64
race_type                              object
purse                                 float64
post_time                              object
weight_carried                          int64
jockey                                 object
odds                                    int64
position_at_finish                      int64
segment_distance                      float64
segment_angle                         float64
segment_speed                         float64
segment_acceleration              

In [45]:
aqu8 = (
    complete
        .loc[
            (complete['track_id'] == 'AQU')
            &
            (complete['race_date'] == '2019-01-01')
            &
            (complete['race_number'] == 8)
        ]
)

In [174]:
df = (
    complete
        .loc[
            (complete['track_id'] == 'AQU')
            &
            (complete['race_date'] == '2019-01-01')
            &
            (complete['race_number'] == 8)
            &
            (complete['trakus_index'].isin([12, 13]))
        ]
)
px.scatter(
    df,
    x='longitude',
    y='latitude',
    color='program_number',
    hover_data=['trakus_index'],
    
)

In [173]:
aqu8 = aqu8.sort_values(['program_number', 'trakus_index'])

In [226]:
l = list(
        zip(
            list(zip(aqu8['longitude'], aqu8['latitude'])), 
            list(zip(aqu8['longitude'].shift(-1), aqu8['latitude'].shift(-1)))
    )
)
aqu8['vector_startend'] = l

In [227]:
# for now, set df as the 12th trakus of aqu8
df = aqu8[aqu8['trakus_index'] == 12].reset_index(drop=True)
px.scatter(
    df,
    x='longitude',
    y='latitude',
    color='program_number',
    hover_data=['trakus_index'],
    
)

In [228]:
# the vector column is a collection of position pairs:
# (0) the starting position of the current trakus
# (1) the starting position of the next trakus
print(df.loc[0, 'vector_startend'])
print(df.loc[1, 'vector_startend'])
print(np.array(df.loc[1, 'vector_startend'][0]) - np.array(df.loc[0, 'vector_startend'][0]))

((-73.8302651610951, 40.6671073313047), (-73.8302516496949, 40.6671389865046))
((-73.8302441353679, 40.6671036226205), (-73.8302283013935, 40.6671358757166))
[ 2.10257272e-05 -3.70868420e-06]


In [241]:
def unit_vectorize(position_pair):
    distance = [position_pair[1][0] - position_pair[0][0], position_pair[1][1] - position_pair[0][1]]
    norm = np.sqrt(distance[0] ** 2 + distance[1] ** 2)
    direction = [distance[0] / norm, distance[1] / norm]
    return direction

def get_relative_position(reference_startend, position_to_transform, pos_from_startend=False, print_components=False):
    unit_vector = unit_vectorize(reference_startend)
    orthogonal_basis = [unit_vector[1], -unit_vector[0]]
    linear_transformation = np.array([[orthogonal_basis[0],unit_vector[0]], [orthogonal_basis[1],unit_vector[1]]])
    if pos_from_startend:
        relative_pretransform = np.array(position_to_transform[0]) - np.array(reference_startend[0])
    else:
        relative_pretransform = np.array(position_to_transform) - np.array(reference_startend[0])
    # relative_position = np.matmul(relative_pretransform, linear_transformation)
    # relative_position = np.matmul(linear_transformation, relative_pretransform)
    relative_position = np.matmul(np.linalg.inv(linear_transformation), relative_pretransform)
    if print_components:
        print("unit vector:", unit_vector)
        print("orthogonal basis:", orthogonal_basis)
        print("linear transformation:\n", linear_transformation)
        print("relative pretransform", relative_pretransform)


    return relative_position


In [239]:
# here's a test case.

get_relative_position(
    reference_startend=((1, -2), (1, 5)),
    position_to_transform=[(1,-3), (-2, 3)],
    pos_from_startend=True,
    print_components=True
)



unit vector: [0.0, 1.0]
orthogonal basis: [1.0, -0.0]
linear transformation:
 [[ 1.  0.]
 [-0.  1.]]
relative pretransform [ 0 -1]


array([ 0., -1.])

In [252]:
get_relative_position(
    reference_startend=[(-73.8302651610951, 40.6671073313047), (-73.8302516496949, 40.6671389865046)],
    position_to_transform=[(-73.8302441353679, 40.6671036226205), (-73.8302283013935, 40.6671358757166)],
    pos_from_startend=True,
    print_components=True
)

unit vector: [0.3925660434211893, 0.9197238180848819]
orthogonal basis: [0.9197238180848819, -0.3925660434211893]
linear transformation:
 [[ 0.91972382  0.39256604]
 [-0.39256604  0.91972382]]
relative pretransform [ 2.10257272e-05 -3.70868420e-06]


array([2.07937656e-05, 4.84302134e-06])

In [247]:
from itertools import product, permutations, combinations

def get_relative_positions_simple(reference_startend, positions_list):
    return [get_relative_position(reference_startend, pos) for pos in positions_list]

def get_relative_positions(positions_list):
    #return [print(res, trans) for (res, trans) in product(positions_list, positions_list)]
    return [[get_relative_position(ref, trans)[0] for trans in positions_list] for ref in positions_list]
    # return [get_relative_position(ref, trans)[0] for (ref, trans) in product(positions_list, positions_list)]


In [249]:
df.head(6)

Unnamed: 0,track_id,race_date,race_number,program_number,trakus_index,latitude,longitude,distance_id,course_type,track_condition,...,odds,position_at_finish,segment_distance,segment_angle,segment_speed,segment_acceleration,initial_projected_position,actual_vs_initial_projected_finish,vectors,vector_startend
0,AQU,2019-01-01,8,1,12,40.667107,-73.830265,800,D,GD,...,540,6,3.696,4.061762e-07,14.784,0.3,4,-2.0,"((40.6671073313047, -73.8302651610951), (40.66...","((-73.8302651610951, 40.6671073313047), (-73.8..."
1,AQU,2019-01-01,8,2,12,40.667104,-73.830244,800,D,GD,...,420,3,3.824,4.256623e-07,15.296,0.32,3,0.0,"((40.6671036226205, -73.8302441353679), (40.66...","((-73.8302441353679, 40.6671036226205), (-73.8..."
2,AQU,2019-01-01,8,3,12,40.667126,-73.830229,800,D,GD,...,150,1,4.005,4.407823e-07,16.02,0.28,1,0.0,"((40.6671259738659, -73.8302294213282), (40.66...","((-73.8302294213282, 40.6671259738659), (-73.8..."
3,AQU,2019-01-01,8,4,12,40.667121,-73.830223,800,D,GD,...,900,2,4.03,4.480271e-07,16.12,0.9,5,3.0,"((40.6671213595187, -73.8302226045564), (40.66...","((-73.8302226045564, 40.6671213595187), (-73.8..."
4,AQU,2019-01-01,8,5,12,40.66709,-73.830216,800,D,GD,...,1400,4,3.757,4.145631e-07,15.028,1.2,6,2.0,"((40.6670896804936, -73.8302164289571), (40.66...","((-73.8302164289571, 40.6670896804936), (-73.8..."
5,AQU,2019-01-01,8,6,12,40.667094,-73.830133,800,D,GD,...,245,5,3.969,4.099851e-07,15.876,0.172,2,-3.0,"((40.6670938162518, -73.8301327061395), (40.66...","((-73.8301327061395, 40.6670938162518), (-73.8..."


In [251]:
get_relative_positions(
    df['vector_startend']
)

[[array([-5.30411692e-06, -1.24267566e-05]),
  array([ 4.86799488e-06, -1.46165320e-05]),
  array([ 1.30626349e-05, -3.63058534e-06]),
  array([ 1.60116761e-05, -5.98716953e-06]),
  array([ 2.01377661e-05, -2.18936574e-05]),
  array([ 6.68927819e-05, -2.09772326e-05])],
 [array([-1.55624973e-05, -1.22552806e-05]),
  array([-6.97783398e-06, -1.42135350e-05]),
  array([ 2.3779977e-07, -4.8939614e-06]),
  array([ 2.6294078e-06, -6.9173848e-06]),
  array([ 6.42878221e-06, -2.05032410e-05]),
  array([ 4.76288924e-05, -2.00319982e-05])],
 [array([-2.40720115e-05, -2.22640631e-05]),
  array([-1.39624822e-05, -2.44447490e-05]),
  array([-5.80633505e-06, -1.35244797e-05]),
  array([-2.87930299e-06, -1.58679415e-05]),
  array([ 1.23401557e-06, -3.16829978e-05]),
  array([ 4.77706357e-05, -3.07842272e-05])],
 [array([-2.58000731e-05, -2.02738188e-05]),
  array([-1.69463311e-05, -2.22714556e-05]),
  array([-9.56437485e-06, -1.26693648e-05]),
  array([-7.07840199e-06, -1.47493037e-05]),
  array([-3

1. Scale reference vector to unit vector [a, b].
2. Use [b, -a] as orthogonal basis.
3. Create linear transformation [[b,a], [-a,b]].
4. Use np.matmul(lintrans, pos_vector) where the position vector is the other horse's position at the start of the trakus_index.