In [1]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error

from tqdm import tqdm

# Initialize

In [2]:
limbs_3d = {
        'left_hand':22, 'left_wrist':20, 'left_elbow':18, 'left_shoulder':16, 'left_back':13,
        'right_hand':23, 'right_wrist':21, 'right_elbow':19, 'right_shoulder':17, 'right_back':14,
        'nose':15, 'neck':12, 'upper_back':9, 'back':6, 'lower_back':3, 'hip':0,
        'left_hip':1, 'left_knee':4, 'left_ankle':7, 'left_foot': 10,
        'right_hip':2, 'right_knee':5, 'right_ankle':8, 'right_foot': 11
}
limbs_3d_r = {i: j for j, i in limbs_3d.items()}

limbs = {
        'nose':0, 'left_eye':1, 'right_eye':2, 'left_ear':3, 'right_ear':4, 'left_shoulder':5, 'right_shoulder':6, 'left_elbow':7,
        'right_elbow':8, 'left_wrist':9, 'right_wrist':10, 'left_hip':11, 'right_hip':12, 'left_knee':13, 'right_knee':14,
        'left_ankle':15, 'right_ankle':16
}
limbs_r = {i: j for j, i in limbs.items()}

In [3]:
data = json.load(open('data/tab/joined/data.json', 'r'))

# Pose

In [4]:
dfs_to_concat = []
for key in data.keys():
    dfs_to_concat.append(pd.DataFrame([sum(i, []) for i in data[key]['pose']], columns=sum([[limbs_3d_r[i] + '_x', limbs_3d_r[i] + '_y', limbs_3d_r[i] + '_z'] for i in range(len(limbs_3d_r.keys()))], [])))
    dfs_to_concat[-1]['it'] = dfs_to_concat[-1].index
    dfs_to_concat[-1]['key'] = int(key)
df_pose = pd.concat(dfs_to_concat)
df_pose

Unnamed: 0,hip_x,hip_y,hip_z,left_hip_x,left_hip_y,left_hip_z,right_hip_x,right_hip_y,right_hip_z,lower_back_x,...,right_wrist_y,right_wrist_z,left_hand_x,left_hand_y,left_hand_z,right_hand_x,right_hand_y,right_hand_z,it,key
0,-0.001809,-0.223389,0.028214,0.061646,-0.128174,0.039612,-0.074707,-0.136963,0.028122,0.000241,...,-0.142456,-0.021500,0.170532,-0.062073,-0.119873,-0.228394,-0.058990,-0.039093,0,101
1,-0.001815,-0.223511,0.028214,0.062439,-0.128418,0.034882,-0.074280,-0.136841,0.029800,0.001024,...,-0.138672,-0.009529,0.177856,-0.060913,-0.113525,-0.219482,-0.054565,-0.024857,1,101
2,-0.001818,-0.223633,0.028229,0.062683,-0.128784,0.037323,-0.073853,-0.136475,0.027710,-0.000484,...,-0.143677,-0.018478,0.185791,-0.063843,-0.101807,-0.227051,-0.060059,-0.035950,2,101
3,-0.001815,-0.223511,0.028229,0.063049,-0.129028,0.037476,-0.073608,-0.136230,0.029251,-0.000496,...,-0.137939,-0.018539,0.180420,-0.057037,-0.101379,-0.222778,-0.053772,-0.033539,3,101
4,-0.001812,-0.223511,0.028214,0.062042,-0.128052,0.034393,-0.074646,-0.136963,0.027756,0.001047,...,-0.136230,-0.012108,0.181641,-0.060669,-0.115173,-0.229126,-0.052124,-0.026688,4,101
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,-0.001804,-0.223633,0.028229,0.048187,-0.122803,0.052460,-0.086060,-0.151001,0.049042,0.018387,...,-0.355957,0.058228,0.601562,-0.256592,-0.087646,-0.420898,-0.297852,0.025848,96,824
97,-0.001808,-0.223633,0.028229,0.048920,-0.122131,0.047394,-0.085327,-0.150513,0.050171,0.018829,...,-0.372803,0.049927,0.571289,-0.237793,-0.122925,-0.430420,-0.321777,0.011765,97,824
98,-0.001817,-0.223755,0.028214,0.051331,-0.122559,0.041473,-0.082092,-0.149658,0.057678,0.018661,...,-0.383057,0.044373,0.516602,-0.232544,-0.162720,-0.395996,-0.338623,-0.003744,98,824
99,-0.001817,-0.223755,0.028198,0.055756,-0.124756,0.039886,-0.078735,-0.145752,0.056488,0.013779,...,-0.383545,0.052826,0.472168,-0.226929,-0.195312,-0.422852,-0.337158,0.006870,99,824


In [5]:
dfs_to_concat = []
for key in data.keys():
    dfs_to_concat.append(pd.DataFrame([sum(i, []) for i in data[key]['pose2d']], columns=sum([[limbs_r[i] + '_x', limbs_r[i] + '_y'] for i in range(len(limbs_r.keys()))], [])))
    dfs_to_concat[-1]['it'] = dfs_to_concat[-1].index
    dfs_to_concat[-1]['key'] = int(key)
df_pose2d = pd.concat(dfs_to_concat)
df_pose2d

Unnamed: 0,nose_x,nose_y,left_eye_x,left_eye_y,right_eye_x,right_eye_y,left_ear_x,left_ear_y,right_ear_x,right_ear_y,...,left_knee_x,left_knee_y,right_knee_x,right_knee_y,left_ankle_x,left_ankle_y,right_ankle_x,right_ankle_y,it,key
0,0.435400,0.202266,0.432199,0.204362,0.431600,0.201046,0.435162,0.207141,0.433978,0.198157,...,0.550969,0.203969,0.549909,0.192513,0.589988,0.201803,0.590075,0.190985,0,101
1,0.435419,0.202231,0.432162,0.204321,0.431575,0.201054,0.435229,0.207127,0.433980,0.198160,...,0.550709,0.203791,0.549772,0.192504,0.590181,0.201830,0.590478,0.191117,1,101
2,0.435456,0.202155,0.432200,0.204225,0.431666,0.200960,0.435188,0.207013,0.434019,0.198173,...,0.549749,0.203757,0.549362,0.192892,0.589601,0.201756,0.589827,0.191051,2,101
3,0.435413,0.202251,0.432158,0.204302,0.431643,0.201056,0.435148,0.207073,0.433988,0.198233,...,0.549443,0.204023,0.549338,0.192921,0.589370,0.201797,0.589060,0.191047,3,101
4,0.435647,0.202126,0.432381,0.204124,0.431887,0.200902,0.435461,0.207053,0.434270,0.198251,...,0.549596,0.204240,0.548724,0.193009,0.589418,0.201786,0.589089,0.191032,4,101
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,0.360017,0.174144,0.354298,0.176176,0.354513,0.172204,0.347414,0.182513,0.346921,0.170761,...,0.495233,0.177928,0.493216,0.158024,0.562170,0.174712,0.524769,0.142037,96,824
97,0.360729,0.173987,0.355614,0.176223,0.355457,0.172287,0.348542,0.182443,0.347419,0.171107,...,0.494910,0.177903,0.493123,0.158569,0.561833,0.174944,0.541903,0.142753,97,824
98,0.360532,0.174439,0.355316,0.176641,0.355156,0.172786,0.349060,0.182652,0.347704,0.171105,...,0.494466,0.178032,0.487683,0.160282,0.561123,0.175033,0.551653,0.146107,98,824
99,0.360743,0.175249,0.355745,0.177509,0.355601,0.173205,0.349775,0.182937,0.348929,0.170933,...,0.492917,0.178264,0.484859,0.160745,0.561552,0.175104,0.549982,0.150488,99,824


## Transforming data into angular motion

In [6]:
angle_ids = {
    'right_knee': ['right_ankle', 'right_hip'],
    'left_knee': ['left_ankle', 'left_hip'],
    'right_hip': ['right_knee', 'right_shoulder'],
    'left_hip': ['left_knee', 'left_shoulder'],
    'right_shoulder': ['right_hip', 'right_elbow'],
    'left_shoulder': ['left_hip', 'left_elbow'],
    'right_elbow': ['right_shoulder', 'right_wrist'],
    'left_elbow': ['left_shoulder', 'left_wrist'],
    'right_ankle':['right_foot', 'right_knee'],
    'left_ankle':['left_foot', 'left_knee'],
    'hip':['left_hip', 'right_hip'],
    'neck':['nose', 'hip']
    }

In [7]:
for angle_id in angle_ids.keys():
    df_pose['va_x'] = df_pose[angle_ids[angle_id][0] + '_x'] - df_pose[angle_id + '_x']
    df_pose['va_y'] = df_pose[angle_ids[angle_id][0] + '_y'] - df_pose[angle_id + '_y']
    df_pose['va_z'] = df_pose[angle_ids[angle_id][0] + '_z'] - df_pose[angle_id + '_z']
    df_pose['vb_x'] = df_pose[angle_ids[angle_id][1] + '_x'] - df_pose[angle_id + '_x']
    df_pose['vb_y'] = df_pose[angle_ids[angle_id][1] + '_y'] - df_pose[angle_id + '_y']
    df_pose['vb_z'] = df_pose[angle_ids[angle_id][1] + '_z'] - df_pose[angle_id + '_z']

    df_pose[angle_id + '_angle'] = np.arccos((df_pose.va_x * df_pose.vb_x + df_pose.va_y * df_pose.vb_y + df_pose.va_z * df_pose.vb_z) / (np.sqrt((df_pose.va_x ** 2) + (df_pose.va_y ** 2) + (df_pose.va_z ** 2)) + np.sqrt((df_pose.vb_x ** 2) + (df_pose.vb_y ** 2) + (df_pose.vb_z ** 2))))
    df_pose[angle_id + '_angle'] = df_pose[angle_id + '_angle'].rolling(5).mean().diff().rolling(5).mean()

In [8]:
df = df_pose[['key', 'it'] + [angle_id + '_angle' for angle_id in angle_ids.keys()]].copy().dropna()

In [9]:
for angle_id in list(angle_ids.keys())[:-4]:
    df_pose2d['va_x'] = df_pose2d[angle_ids[angle_id][0] + '_x'] - df_pose2d[angle_id + '_x']
    df_pose2d['va_y'] = df_pose2d[angle_ids[angle_id][0] + '_y'] - df_pose2d[angle_id + '_y']
    df_pose2d['vb_x'] = df_pose2d[angle_ids[angle_id][1] + '_x'] - df_pose2d[angle_id + '_x']
    df_pose2d['vb_y'] = df_pose2d[angle_ids[angle_id][1] + '_y'] - df_pose2d[angle_id + '_y']

    df_pose2d[angle_id + '_angle'] = np.arccos((df_pose2d.va_x * df_pose2d.vb_x + df_pose2d.va_y * df_pose2d.vb_y) / (np.sqrt((df_pose2d.va_x ** 2) + (df_pose2d.va_y ** 2)) + np.sqrt((df_pose2d.vb_x ** 2) + (df_pose2d.vb_y ** 2))))
    df_pose2d[angle_id + '_angle'] = df_pose2d[angle_id + '_angle'].rolling(5).mean().diff().rolling(5).mean()

In [10]:
df2d = df_pose2d[['key', 'it'] + [angle_id + '_angle' for angle_id in list(angle_ids.keys())[:-4]]].copy().dropna()

# Ball

In [11]:
dfs_to_concat = []
data_speed = []
for key in list(data.keys()):
    df_ = pd.DataFrame(data[key]['ball'], columns=['y0','x0', 'y1', 'x1'])
    df_['it'] = df_.index
    df_['key'] = int(key)

    df_['x'] = (df_.x0 + df_.x1) / 2
    df_['y'] = -(df_.y0 + df_.y1) / 2 + 1
    df_['area'] = (df_.x0 - df_.x1) * (df_.y0 - df_.y1)
    
    dfs_to_concat.append(df_)

    # Calculating ball speed
    ## Remove iterations where the ball is inside the goal
    df_ = df_.loc[df_.x < 0.75]
    ## Remove iterations where the size of the ball reduces substantially (ball occluded)
    df_ = df_[~((df_.x.rolling(10000, min_periods=0).max() > 0.66) & (df_.area < (df_.area.shift(1) * 0.8)))]
    ## Remove iterations where the ball is close to the goal and then goes back
    df_ = df_[~((df_.x.rolling(10000, min_periods=0).max() > 0.66) & (df_.x < df_.x.rolling(10000, min_periods=0).max()))]
    ## Remove iterations that hit the bar and do not go back nor inside the goal
    df_ = df_[~((df_.x.rolling(10000, min_periods=0).max() > 0.66) & (df_.x.diff() < 0.005))]

    data_speed.append([int(key), len(df_), 6.128825336065631 / (len(df_)/90), 6.128825336065631 / (len(df_)/90) * 3.6])

df_ball_speed = pd.DataFrame(data_speed, columns=['key', 'n_frames', 'speed_ms', 'speed_kmh'])
df_ball = pd.concat(dfs_to_concat)
df_ball

Unnamed: 0,y0,x0,y1,x1,it,key,x,y,area
0,0.620972,0.157457,0.651683,0.175482,0,101,0.166470,0.363672,0.000554
1,0.614591,0.164870,0.651677,0.184015,1,101,0.174443,0.366866,0.000710
2,0.611307,0.174949,0.648353,0.193216,2,101,0.184082,0.370170,0.000677
3,0.607573,0.183516,0.644642,0.201501,3,101,0.192509,0.373893,0.000667
4,0.605590,0.191744,0.637863,0.211083,4,101,0.201414,0.378273,0.000624
...,...,...,...,...,...,...,...,...,...
29,0.038967,0.627171,0.096917,0.660418,29,824,0.643794,0.932058,0.001927
30,0.022772,0.651661,0.083165,0.685025,30,824,0.668343,0.947032,0.002015
31,0.008603,0.675427,0.067768,0.708301,31,824,0.691864,0.961814,0.001945
32,0.000474,0.698928,0.053271,0.732160,32,824,0.715544,0.973127,0.001755


In [12]:
df_ball_speed

Unnamed: 0,key,n_frames,speed_ms,speed_kmh
0,101,35,15.759837,56.735412
1,102,33,16.714978,60.173921
2,103,30,18.386476,66.191314
3,104,31,17.793364,64.056110
4,105,28,19.699796,70.919265
...,...,...,...,...
140,820,33,16.714978,60.173921
141,821,36,15.322063,55.159428
142,822,29,19.020492,68.473773
143,823,23,23.982360,86.336496


# Merging data sets

In [13]:
df = df.merge(df_ball_speed, on='key')
df

Unnamed: 0,key,it,right_knee_angle,left_knee_angle,right_hip_angle,left_hip_angle,right_shoulder_angle,left_shoulder_angle,right_elbow_angle,left_elbow_angle,right_ankle_angle,left_ankle_angle,hip_angle,neck_angle,n_frames,speed_ms,speed_kmh
0,101,9,-0.000180,0.000101,0.000079,0.000058,0.000146,0.000306,-0.000445,-0.000161,-0.000026,0.000105,-1.789370e-06,0.000002,35,15.759837,56.735412
1,101,10,-0.000299,0.000119,0.000047,0.000118,0.000124,0.000288,-0.000471,-0.000220,-0.000096,0.000074,-8.076696e-07,0.000027,35,15.759837,56.735412
2,101,11,-0.000300,0.000225,0.000088,0.000062,0.000051,0.000280,-0.000297,-0.000133,-0.000118,0.000114,-1.710774e-06,0.000022,35,15.759837,56.735412
3,101,12,-0.000238,0.000315,0.000284,0.000048,0.000110,0.000290,-0.000368,-0.000210,-0.000124,0.000142,-3.486316e-06,-0.000010,35,15.759837,56.735412
4,101,13,-0.000207,0.000353,0.000431,0.000154,0.000215,0.000293,-0.000499,-0.000305,-0.000124,0.000149,-4.004232e-06,-0.000023,35,15.759837,56.735412
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29286,824,96,0.007982,-0.000443,0.001467,-0.000672,0.002591,-0.000401,-0.005403,-0.002836,0.000988,0.000458,5.764859e-06,-0.000561,33,16.714978,60.173921
29287,824,97,0.013764,0.001653,0.000968,-0.001239,0.002698,-0.000362,-0.005345,-0.002048,0.001451,0.000841,3.756489e-06,-0.000924,33,16.714978,60.173921
29288,824,98,0.018620,0.002776,0.000629,-0.001393,0.002556,-0.000373,-0.006343,-0.001336,0.001895,0.001036,-7.426050e-07,-0.001312,33,16.714978,60.173921
29289,824,99,0.022439,0.002986,0.000129,-0.001747,0.002920,-0.001233,-0.006740,-0.002141,0.002417,0.001158,-3.602402e-06,-0.001614,33,16.714978,60.173921


In [14]:
df2d = df2d.merge(df_ball_speed, on='key')
df2d

Unnamed: 0,key,it,right_knee_angle,left_knee_angle,right_hip_angle,left_hip_angle,right_shoulder_angle,left_shoulder_angle,right_elbow_angle,left_elbow_angle,n_frames,speed_ms,speed_kmh
0,101,9,-0.000047,-0.000024,0.000064,0.000011,0.000024,0.000007,-0.000037,0.000022,35,15.759837,56.735412
1,101,10,-0.000049,-0.000009,0.000048,0.000038,0.000048,0.000023,-0.000048,0.000021,35,15.759837,56.735412
2,101,11,-0.000065,0.000010,0.000025,0.000062,0.000049,0.000008,-0.000042,0.000031,35,15.759837,56.735412
3,101,12,-0.000052,0.000036,0.000010,0.000072,0.000082,0.000004,-0.000047,0.000035,35,15.759837,56.735412
4,101,13,-0.000049,0.000042,0.000008,0.000068,0.000120,0.000022,-0.000049,0.000023,35,15.759837,56.735412
...,...,...,...,...,...,...,...,...,...,...,...,...,...
29286,824,96,0.000165,-0.000002,0.000583,-0.000093,0.000551,0.000146,-0.000425,-0.000098,33,16.714978,60.173921
29287,824,97,0.000063,0.000081,0.000697,-0.000106,0.000672,0.000153,-0.000439,-0.000134,33,16.714978,60.173921
29288,824,98,0.002065,0.000141,0.000438,-0.000114,0.000674,0.000127,-0.000502,-0.000114,33,16.714978,60.173921
29289,824,99,0.003346,0.000181,0.000170,-0.000110,0.000690,0.000047,-0.000535,-0.000043,33,16.714978,60.173921


In [15]:
from tsfresh import extract_relevant_features

features_filtered_direct = extract_relevant_features(df[[angle_id + '_angle' for angle_id in angle_ids.keys()] + ['key', 'it']], df_ball_speed.set_index('key')['speed_kmh'], column_id='key', column_sort='it')
features = features_filtered_direct.columns.to_list()
processed_data = features_filtered_direct.join(df_ball_speed.set_index('key')['speed_kmh']).copy()
processed_data

Feature Extraction: 100%|██████████| 20/20 [01:55<00:00,  5.79s/it]


Unnamed: 0,right_knee_angle__c3__lag_3,right_knee_angle__c3__lag_2,right_ankle_angle__time_reversal_asymmetry_statistic__lag_1,right_ankle_angle__last_location_of_maximum,right_knee_angle__last_location_of_maximum,"right_knee_angle__agg_linear_trend__attr_""slope""__chunk_len_5__f_agg_""var""","right_knee_angle__fft_coefficient__attr_""imag""__coeff_76","right_ankle_angle__fft_coefficient__attr_""imag""__coeff_47",right_ankle_angle__absolute_maximum,right_ankle_angle__maximum,...,right_ankle_angle__fourier_entropy__bins_2,"left_knee_angle__fft_coefficient__attr_""real""__coeff_67","right_ankle_angle__fft_coefficient__attr_""angle""__coeff_49","right_shoulder_angle__fft_coefficient__attr_""imag""__coeff_47","left_knee_angle__fft_coefficient__attr_""real""__coeff_87","neck_angle__agg_autocorrelation__f_agg_""mean""__maxlag_40",right_shoulder_angle__friedrich_coefficients__coeff_2__m_3__r_30,"right_hip_angle__change_quantiles__f_agg_""mean""__isabs_True__qh_0.8__ql_0.4","right_ankle_angle__fft_coefficient__attr_""real""__coeff_27",speed_kmh
101,-3.821865e-08,-5.559073e-08,6.075525e-11,0.993750,0.993750,2.354432e-07,0.000204,0.000356,0.001940,0.001940,...,0.196694,0.000542,130.040748,0.000536,-0.000145,-0.047572,-0.043028,0.000253,-0.001358,56.735412
102,-1.343656e-08,-2.275096e-08,1.656098e-11,0.995951,0.995951,6.457984e-08,-0.004519,-0.000123,0.001732,0.001732,...,0.113939,-0.000531,-46.085758,-0.000133,-0.001150,0.023011,-0.013916,0.000062,-0.001236,60.173921
103,-3.993737e-08,-5.435266e-08,3.641498e-11,1.000000,1.000000,2.109684e-07,0.001744,-0.000111,0.001952,0.001952,...,0.115774,-0.000367,38.296882,0.000598,-0.000145,-0.002018,0.006092,0.000227,-0.000186,66.191314
104,-5.483482e-08,-2.489835e-08,9.957453e-11,1.000000,1.000000,3.581329e-07,0.000846,0.000318,0.002750,0.002750,...,0.132691,-0.001516,126.085356,0.000546,-0.000338,-0.006354,0.003895,0.000116,-0.000460,64.056110
105,-9.020689e-08,-6.472868e-08,6.575856e-11,1.000000,1.000000,1.134702e-07,0.000006,-0.000021,0.002526,0.002526,...,0.099623,-0.004002,-142.474294,0.000033,-0.001415,0.007886,0.020536,0.000126,0.001834,70.919265
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
820,-2.562788e-08,-1.773447e-08,4.456715e-11,1.000000,1.000000,1.301407e-07,0.002511,0.000041,0.002219,0.002219,...,0.121692,-0.002565,-163.627301,0.000789,-0.001306,0.020816,-0.031657,0.000128,0.001614,60.173921
821,-6.384805e-09,-1.247727e-09,2.818682e-12,0.004902,0.004902,9.964272e-09,-0.001409,-0.000600,0.001739,0.001739,...,0.131692,-0.001132,-80.417624,-0.000235,0.001107,0.050664,-0.030535,0.000071,0.001408,55.159428
822,-8.966834e-08,-9.916125e-08,1.281585e-10,1.000000,1.000000,3.498354e-07,0.000127,0.000688,0.002459,0.002459,...,0.210283,0.001216,163.903231,0.000396,-0.000145,0.044829,-0.013386,0.000067,-0.000521,68.473773
823,-5.238346e-08,2.673947e-08,1.224520e-10,1.000000,0.992308,3.847432e-07,0.000127,0.000033,0.002691,0.002691,...,0.228632,0.000039,165.635456,0.000687,-0.000145,0.026994,0.017002,0.000109,-0.000182,86.336496


In [16]:
features_filtered_direct = extract_relevant_features(df2d[[angle_id + '_angle' for angle_id in list(angle_ids.keys())[:3]] + ['key', 'it']], df_ball_speed.set_index('key')['speed_kmh'], column_id='key', column_sort='it')
features2d = features_filtered_direct.columns.to_list()
processed_data2d = features_filtered_direct.join(df_ball_speed.set_index('key')['speed_kmh']).copy()
processed_data2d

Feature Extraction: 100%|██████████| 20/20 [00:29<00:00,  1.48s/it]


Unnamed: 0,right_knee_angle__mean_abs_change,"right_knee_angle__change_quantiles__f_agg_""mean""__isabs_True__qh_1.0__ql_0.0","right_hip_angle__agg_linear_trend__attr_""slope""__chunk_len_5__f_agg_""max""","right_knee_angle__change_quantiles__f_agg_""var""__isabs_False__qh_1.0__ql_0.0","right_knee_angle__change_quantiles__f_agg_""mean""__isabs_True__qh_0.8__ql_0.0","right_knee_angle__change_quantiles__f_agg_""var""__isabs_True__qh_1.0__ql_0.0","right_hip_angle__agg_linear_trend__attr_""slope""__chunk_len_10__f_agg_""max""","left_knee_angle__agg_linear_trend__attr_""slope""__chunk_len_10__f_agg_""max""","right_hip_angle__change_quantiles__f_agg_""mean""__isabs_True__qh_1.0__ql_0.6","right_hip_angle__change_quantiles__f_agg_""var""__isabs_False__qh_1.0__ql_0.6",...,"right_knee_angle__fft_coefficient__attr_""real""__coeff_76",right_knee_angle__quantile__q_0.3,"right_hip_angle__fft_coefficient__attr_""real""__coeff_32","right_knee_angle__agg_linear_trend__attr_""intercept""__chunk_len_10__f_agg_""var""","right_knee_angle__fft_coefficient__attr_""real""__coeff_75","right_hip_angle__fft_coefficient__attr_""imag""__coeff_24","right_hip_angle__fft_coefficient__attr_""angle""__coeff_31","right_knee_angle__fft_coefficient__attr_""imag""__coeff_30","right_knee_angle__agg_linear_trend__attr_""slope""__chunk_len_50__f_agg_""mean""",speed_kmh
101,0.000106,0.000106,0.000008,7.430840e-08,0.000080,6.388484e-08,0.000030,0.000034,0.000046,4.663695e-09,...,-0.002151,-0.000049,0.000352,-1.008321e-06,-0.001434,-0.001628,-49.271159,0.002474,0.000394,56.735412
102,0.000145,0.000145,0.000004,1.675277e-07,0.000100,1.464441e-07,0.000010,0.000011,0.000037,2.776108e-09,...,0.003500,-0.000053,0.000037,-3.688240e-07,0.003570,0.000291,172.852640,0.016775,-0.000035,60.173921
103,0.000144,0.000144,0.000011,1.020460e-07,0.000104,8.142738e-08,0.000024,0.000032,0.000038,2.529109e-09,...,-0.000495,-0.000103,0.000092,-6.331981e-07,-0.000230,-0.001179,-56.956425,0.000835,0.000397,66.191314
104,0.000125,0.000125,0.000007,1.092190e-07,0.000094,9.372537e-08,0.000011,0.000026,0.000046,3.441994e-09,...,0.001256,-0.000111,-0.000254,2.571643e-07,0.001053,-0.002074,-123.543623,0.000474,0.000092,64.056110
105,0.000138,0.000138,0.000009,9.979957e-08,0.000096,8.110484e-08,0.000017,0.000023,0.000039,3.424636e-09,...,-0.001660,-0.000088,0.000221,-2.509961e-07,-0.001676,-0.001732,-54.557504,-0.000703,-0.000065,70.919265
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
820,0.000104,0.000104,0.000005,6.144984e-08,0.000077,5.057896e-08,0.000012,0.000014,0.000029,1.817427e-09,...,0.000522,-0.000099,0.002031,7.536153e-08,0.000241,-0.000023,-47.877218,0.002330,-0.000047,60.173921
821,0.000085,0.000085,0.000004,5.055843e-08,0.000059,4.344704e-08,0.000009,0.000025,0.000033,2.568244e-09,...,0.001137,-0.000069,0.000151,1.928456e-07,0.001206,-0.003120,37.773667,-0.003708,0.000426,55.159428
822,0.000145,0.000145,0.000007,1.178481e-07,0.000096,9.791212e-08,0.000028,0.000030,0.000039,3.891305e-09,...,0.000029,-0.000084,-0.000003,-5.360029e-07,0.000005,-0.000227,-81.055694,0.002890,-0.000128,68.473773
823,0.000234,0.000234,0.000016,4.048625e-07,0.000104,3.502072e-07,0.000030,0.000042,0.000037,3.621367e-09,...,0.000029,-0.000084,-0.000009,2.412889e-07,0.000005,-0.000001,-94.655694,0.001758,-0.000237,86.336496


In [17]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler

normalizer1 = MinMaxScaler()
processed_data['norm_speed_kmh'] = normalizer1.fit_transform(processed_data[['speed_kmh']])
normalizer2 = MinMaxScaler()
processed_data2d['norm_speed_kmh'] = normalizer2.fit_transform(processed_data2d[['speed_kmh']])

In [18]:
kf = KFold(n_splits=20)
all_predictions = {
    'linear_regression':[],
    'knn':[],
    'grad_boost':[],
    'random_forest':[],
    'neural_net':[],
    }
for train, test in tqdm(kf.split(processed_data)):
    train_data = processed_data.iloc[train,:]
    test_data = processed_data.iloc[test,:]

    train_x = train_data[features]
    train_y = train_data['norm_speed_kmh']
    test_x = test_data[features]
    test_y = test_data['norm_speed_kmh']

    # Linear Regression
    model = LinearRegression(n_jobs=-1)
    model.fit(train_x, train_y)
    all_predictions['linear_regression'] += model.predict(test_x).tolist()
    # K-Nearest Neighbors
    model = KNeighborsRegressor(n_neighbors=5, n_jobs=-1)
    model.fit(train_x, train_y)
    all_predictions['knn'] += model.predict(test_x).tolist()
    # Gradient Boosting
    model = GradientBoostingRegressor(n_estimators=1000)
    model.fit(train_x, train_y)
    all_predictions['grad_boost'] += model.predict(test_x).tolist()
    # Random Forest
    model = RandomForestRegressor(n_estimators=1000, n_jobs=-1)
    model.fit(train_x, train_y)
    all_predictions['random_forest'] += model.predict(test_x).tolist()

20it [09:17, 27.87s/it]


In [19]:
f = open('results/model_importances.csv', 'w')
f.write('feature,feature_importance\n')
for feat, feat_imp in zip(features, model.feature_importances_):
    f.write(f'{feat},{feat_imp}\n')
f.close()

In [20]:
kf = KFold(n_splits=20)
all_predictions2d = {
    'linear_regression':[],
    'knn':[],
    'grad_boost':[],
    'random_forest':[],
    'neural_net':[],
    }
for train, test in tqdm(kf.split(processed_data2d)):
    train_data = processed_data2d.iloc[train,:]
    test_data = processed_data2d.iloc[test,:]

    train_x = train_data[features2d]
    train_y = train_data['norm_speed_kmh']
    test_x = test_data[features2d]
    test_y = test_data['norm_speed_kmh']

    # Linear Regression
    model = LinearRegression(n_jobs=-1)
    model.fit(train_x, train_y)
    all_predictions2d['linear_regression'] += model.predict(test_x).tolist()
    # K-Nearest Neighbors
    model = KNeighborsRegressor(n_neighbors=5, n_jobs=-1)
    model.fit(train_x, train_y)
    all_predictions2d['knn'] += model.predict(test_x).tolist()
    # Gradient Boosting
    model = GradientBoostingRegressor(n_estimators=1000)
    model.fit(train_x, train_y)
    all_predictions2d['grad_boost'] += model.predict(test_x).tolist()
    # Random Forest
    model = RandomForestRegressor(n_estimators=1000, n_jobs=-1)
    model.fit(train_x, train_y)
    all_predictions2d['random_forest'] += model.predict(test_x).tolist()

20it [03:07,  9.39s/it]


In [21]:
scores = {} # r-squared scores dict for all algorithms
scores2d = {}
correlations = {} # correlations dict for all algorithms
correlations2d = {}
for algo in ['linear_regression', 'knn', 'grad_boost', 'random_forest']:
    # 3D
    processed_data['pred_norm_' + algo] = all_predictions[algo]
    processed_data['pred_' + algo] = normalizer1.inverse_transform(processed_data[['pred_norm_' + algo]])
    scores[algo] = round(r2_score(processed_data['speed_kmh'], processed_data['pred_' + algo]), 2)
    correlations[algo] = round(np.corrcoef(processed_data['speed_kmh'], processed_data['pred_' + algo])[0][1], 2)

    # 2D
    processed_data2d['pred_norm_' + algo] = all_predictions2d[algo]
    processed_data2d['pred_' + algo] = normalizer2.inverse_transform(processed_data2d[['pred_norm_' + algo]])
    scores2d[algo] = round(r2_score(processed_data2d['speed_kmh'], processed_data2d['pred_' + algo]), 2)
    correlations2d[algo] = round(np.corrcoef(processed_data2d['speed_kmh'], processed_data2d['pred_' + algo])[0][1], 2)

In [22]:
scores, correlations

({'linear_regression': -1.92,
  'knn': -0.03,
  'grad_boost': 0.44,
  'random_forest': 0.44},
 {'linear_regression': 0.4,
  'knn': 0.26,
  'grad_boost': 0.67,
  'random_forest': 0.68})

In [23]:
scores2d, correlations2d

({'linear_regression': -8.33,
  'knn': 0.11,
  'grad_boost': 0.22,
  'random_forest': 0.29},
 {'linear_regression': 0.37,
  'knn': 0.4,
  'grad_boost': 0.48,
  'random_forest': 0.54})

In [24]:
processed_data.to_csv('results/res_3d_wneck.csv')
processed_data2d.to_csv('results/res_2d_wneck.csv')