In [1]:
# configure auto-reload and add libs to path
%load_ext autoreload
%autoreload 2

import os, sys
fastai_lib_path = os.path.abspath('../../../../fastai')
if fastai_lib_path not in sys.path: sys.path.append(fastai_lib_path)

In [2]:
# core imports
import json, requests
from datetime import timedelta

from fastai.imports import *
from fastai.structured import *

from pandas_summary import DataFrameSummary
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier

from sklearn import metrics

In [3]:
# pandas and plotting config
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)

%matplotlib inline

In [4]:
PATH = 'data'

comp_yr = 2017

tba_key = 'J9XyDSN69eZMHqanEDaevzAywDjxL9iyBYAQh2erLJJ9MtZVG60HsEYeKVwloFGe'
tba_header = { 'X-TBA-Auth-Key': tba_key }
tba_url = 'https://thebluealliance.com/api/v3'

team_match_scores_path = f'{PATH}/{comp_yr}-team-match-scores-raw.csv'
team_event_stats_path = f'{PATH}/{comp_yr}-team-event-stats-raw.csv'
scouting_reports_path = f'{PATH}/{comp_yr}-scouting-reports-raw.csv'

In [5]:
scouting_reports_df = pd.read_csv(scouting_reports_path, low_memory=False)
scouting_reports_df['scored_at_dt'] = pd.to_datetime(scouting_reports_df['scored_at'], unit='ms')
print(len(scouting_reports_df))
scouting_reports_df.head()

1235


Unnamed: 0,asdf,dddd,event_id,rating_overall_gear_efficiency,rating_overall_gear_efficiency_auto,rating_overall_gear_placement_auto,rating_overall_pilot_competency,rating_overall_robot_stability,rating_scoring_airship_climb,rating_scoring_base_line_made_auto,rating_scoring_gears_made,rating_scoring_gears_made_auto,rating_scoring_high_goals_made,rating_scoring_high_goals_made_auto,rating_scoring_low_goals_made,rating_scoring_low_goals_made_auto,scored_at,scored_by,team_id,scored_at_dt
0,,,2017cada,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1490371000000.0,"{'email': 'tylerselinka1@gmail.com', 'name': '...",frc3013,2017-03-24 15:58:55.807
1,,,2017cada,2.0,,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1490371000000.0,"{'email': 'connorrocker@gmail.com', 'name': 'C...",frc2551,2017-03-24 16:00:49.849
2,,,2017cada,2.0,,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1490371000000.0,"{'email': 'brian.titcomb27@gmail.com', 'name':...",frc692,2017-03-24 16:00:57.239
3,,,2017cada,4.0,,0.0,,4.0,1.0,,3.0,0.0,0.0,0.0,0.0,0.0,1490371000000.0,"{'email': 'seanlinden7934@gmail.com', 'name': ...",frc1323,2017-03-24 16:01:17.301
4,,,2017cada,2.0,,0.0,4.0,3.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1490371000000.0,"{'email': 'sca2019@gmail.com', 'name': 'Seth A...",frc4698,2017-03-24 16:01:27.737


In [6]:
scouting_reports_df = scouting_reports_df.sort_values(by=['scored_at_dt'], ascending=True)
# scouting_reports_df = scouting_reports_df[scouting_reports_df.event_id == '2017casd']

scouting_reports_df['match_found'] = 0
scouting_reports_df['match_key'] = None
len(scouting_reports_df)

1235

In [7]:
matches_df = pd.read_csv(team_match_scores_path, low_memory=False, index_col=None)

datetime_cols = ['actual_time', 'post_result_time', 'predicted_time', 'time']
matches_df[datetime_cols] = matches_df[datetime_cols].apply(pd.to_datetime, unit='s')

# df_matches_sd = df_matches[(df_matches['event_key'] == '2017casd') & (df_matches['comp_level'] == 'qm')]
matches_df = matches_df.sort_values(by=['actual_time'], ascending=True)
len(matches_df)

5417

In [8]:
for index, row in matches_df.iterrows():
    # print(row['match_key'], row['match_number'], row['event_key'], row['team_key'], row['actual_time'])
    match_time = row['predicted_time'] #parse_datetime(row['actual_time'])
    match_window_begin = match_time - timedelta(minutes=2)
    match_window_end = match_time + timedelta(minutes=8)
        
#     print(row['team_key'], match_time)
    
    df_scouting_rows = scouting_reports_df[
        (scouting_reports_df['team_id'] == row['team_key']) & 
        (match_window_begin < scouting_reports_df['scored_at_dt']) & 
        (scouting_reports_df['scored_at_dt'] < match_window_end)]
    
    for s_index, s_row in df_scouting_rows.iterrows():
#         print(s_row['scored_at'], s_row['team_id'])    
        scouting_reports_df.loc[
            (scouting_reports_df['scored_at_dt'] == s_row['scored_at_dt']) & 
            (scouting_reports_df['team_id'] == s_row['team_id']),('match_found', 'match_key')] = (1, row['match_key'])
        
#     print('')

In [9]:
for index, row in matches_df.iterrows():
    # print(row['match_key'], row['match_number'], row['event_key'], row['team_key'], row['actual_time'])
    match_time = row['actual_time'] #parse_datetime(row['actual_time'])
    match_window_begin = match_time - timedelta(minutes=2)
    match_window_end = match_time + timedelta(minutes=8)
        
#     print(row['team_key'], match_time)
    
    df_scouting_rows = scouting_reports_df[
        (scouting_reports_df['team_id'] == row['team_key']) & 
        (match_window_begin < scouting_reports_df['scored_at_dt']) & 
        (scouting_reports_df['scored_at_dt'] < match_window_end)]
    
    for s_index, s_row in df_scouting_rows.iterrows():
#         print(s_row['scored_at'], s_row['team_id'])    
        scouting_reports_df.loc[
            (scouting_reports_df['match_found'] == 0) &
            (scouting_reports_df['scored_at_dt'] == s_row['scored_at_dt']) & 
            (scouting_reports_df['team_id'] == s_row['team_id']),('match_found', 'match_key')] = (1, row['match_key'])
        
#     print('')

In [10]:
scouting_reports_df[scouting_reports_df.match_found ==0]

Unnamed: 0,asdf,dddd,event_id,rating_overall_gear_efficiency,rating_overall_gear_efficiency_auto,rating_overall_gear_placement_auto,rating_overall_pilot_competency,rating_overall_robot_stability,rating_scoring_airship_climb,rating_scoring_base_line_made_auto,rating_scoring_gears_made,rating_scoring_gears_made_auto,rating_scoring_high_goals_made,rating_scoring_high_goals_made_auto,rating_scoring_low_goals_made,rating_scoring_low_goals_made_auto,scored_at,scored_by,team_id,scored_at_dt,match_found,match_key
861,,,2017flwp,2.0,,,,2.0,0.0,1.0,3.0,0.0,10.0,0.0,0.0,5.0,1488653000000.0,"{'email': 'wgilliam@ohmeow.com', 'name': 'ohme...",frc125,2017-03-04 18:43:18.611,0,
862,,,2017flwp,1.0,2.0,,,1.0,1.0,1.0,0.0,2.0,20.0,5.0,0.0,15.0,1488653000000.0,"{'email': 'wgilliam@ohmeow.com', 'name': 'ohme...",frc125,2017-03-04 18:45:26.255,0,
863,,,2017flwp,,,,,,,,0.0,0.0,50.0,0.0,0.0,0.0,1488678000000.0,"{'email': 'wgilliam@ohmeow.com', 'name': 'ohme...",frc263,2017-03-05 01:39:32.875,0,
864,,,2017flwp,5.0,,,,5.0,1.0,1.0,0.0,0.0,30.0,20.0,0.0,0.0,1488678000000.0,"{'email': 'wgilliam@ohmeow.com', 'name': 'ohme...",frc263,2017-03-05 01:40:53.050,0,
719,,,2017casd,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1489250000000.0,"{'email': 'noonbui@gmail.com', 'name': 'noonbu...",frc6515,2017-03-11 16:34:31.859,0,
858,,,2017casd,,,,3.0,1.0,,,0.0,0.0,0.0,0.0,0.0,0.0,1489697000000.0,"{'email': 'spencer@klawans.me', 'name': 'Spenc...",frc2102,2017-03-16 20:51:24.273,0,
859,,,2017casd,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1489774000000.0,"{'email': 'spencer@klawans.me', 'name': 'Spenc...",frc3647,2017-03-17 18:05:37.831,0,
42,,,2017cada,1.0,,0.0,1.0,2.0,1.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,1490375000000.0,"{'email': 'seanlinden7934@gmail.com', 'name': ...",frc1056,2017-03-24 17:02:32.525,0,
372,,,2017cada,3.0,,0.0,2.0,3.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1490461000000.0,"{'email': 'delaney.r.stewart@gmail.com', 'name...",frc1351,2017-03-25 16:59:54.964,0,
450,,,2017cada,,4.0,1.0,3.0,5.0,0.0,1.0,0.0,1.0,0.0,10.0,0.0,5.0,1491768000000.0,"{'email': 'wgilliam@ohmeow.com', 'name': 'ohme...",frc3013,2017-04-09 20:00:19.291,0,


In [11]:
# scouting_reports_df

In [12]:
# matches_df[matches_df.match_key == '2017cada_qm1']

In [13]:
scouting_reports_df.drop('scored_at_dt', axis=1, inplace=True)
scouting_reports_df.to_csv(scouting_reports_path, index=False)