## 2023 TBA Predictions

Match data is fetched from TBA by running fetchMatches.py.  Run this first before running this notebook.

`python fetchMatches.py`


In [29]:
from __future__ import print_function
import pickle
#import time
import sys
from collections import Counter
from tqdm import tqdm

sys.path.append('..')
import swagger_client as v3client
from swagger_client.rest import ApiException

# If you fetch_matches best to set reset=True or you may miss some events.
fetch_matches = True
reset = False
year = 2023

if fetch_matches:
    from fetchMatches import fetch_all_matches
    # This will save to matches_{year}.pkl
    fetch_all_matches(year, reset=reset)


Fetching event 2023alhu
Fetching event 2023arli
Fetching event 2023ausc
Fetching event 2023azgl
Fetching event 2023azva
Fetching event 2023bcvi
Fetching event 2023brbr
Fetching event 2023caav
Fetching event 2023cada
Fetching event 2023cafr
Fetching event 2023cala
Fetching event 2023camb
Fetching event 2023caoc
Fetching event 2023caph
Fetching event 2023casd
Fetching event 2023casf
Fetching event 2023casj
Fetching event 2023cave
Fetching event 2023cc
Fetching event 2023chcmp
Fetching event 2023cmptx
Fetching event 2023code
Fetching event 2023cops
Fetching event 2023cthar
Fetching event 2023ctwat
Fetching event 2023flor
Fetching event 2023flta
Fetching event 2023flwp
Fetching event 2023gaalb
Fetching event 2023gacar
Fetching event 2023gacmp
Fetching event 2023gadal
Fetching event 2023gagwi
Fetching event 2023gamac
Fetching event 2023hiho
Fetching event 2023iacf
Fetching event 2023idbo
Fetching event 2023ilch
Fetching event 2023ilpe
Fetching event 2023ilwz
Fetching event 2023incmp
Fetchin

In [30]:
# load all the matches

filename = f'matches_{year}.pkl'
matches = []
with open(filename, 'rb') as f:
    matches = pickle.load(f)

In [31]:
# [m for m in matches['events'] if m.address and 'Spokane' in m.address]
#[m for m in matches['events'] if 'pnc' in m.key]
pnw_district = [m.key for m in matches['events'] if m.district and m.district.abbreviation=='pnw']
pnw_district

['2023orore',
 '2023orsal',
 '2023orwil',
 '2023pncmp',
 '2023waahs',
 '2023wabon',
 '2023wasam',
 '2023wasno',
 '2023wayak']

Filter the matches to completed matches

In [32]:
non_empty = [k for k in matches['matches'].keys() if len(matches['matches'][k])>0]
data = [m for k in matches['matches'] for m in matches['matches'][k]]
data = [m for m in data if m.winning_alliance!='' and m.score_breakdown is not None]
print(f'Found {len(data)} matches')

pnw_teams = set()
for m in [m for m in data if m.event_key in pnw_district]:
    for t in m.alliances.red.team_keys:
        pnw_teams.add(t)
    for t in m.alliances.blue.team_keys:
        pnw_teams.add(t)
    
pnw_teams = list(sorted(pnw_teams))
print(f'PNW Teams: {pnw_teams}')
#red = [x for m in data for x in m.alliances.red.team_keys]
#blue = [x for m in data for x in m.alliances.blue.team_keys]
#from collections import Counter
#Counter(red+blue)

Found 9918 matches
PNW Teams: ['frc1294', 'frc1318', 'frc1359', 'frc1425', 'frc1432', 'frc1540', 'frc1595', 'frc1778', 'frc1899', 'frc1983', 'frc2046', 'frc2097', 'frc2147', 'frc2374', 'frc2412', 'frc2471', 'frc2521', 'frc2522', 'frc2550', 'frc2557', 'frc2635', 'frc2733', 'frc2811', 'frc2898', 'frc2903', 'frc2907', 'frc2910', 'frc2915', 'frc2926', 'frc2928', 'frc2929', 'frc2930', 'frc2976', 'frc2980', 'frc2990', 'frc3024', 'frc3049', 'frc3070', 'frc3218', 'frc3219', 'frc3268', 'frc3393', 'frc3588', 'frc360', 'frc3636', 'frc3663', 'frc3673', 'frc3674', 'frc3681', 'frc3711', 'frc3712', 'frc3786', 'frc3826', 'frc3876', 'frc4043', 'frc4060', 'frc4061', 'frc4089', 'frc4104', 'frc4125', 'frc4127', 'frc4131', 'frc4173', 'frc4180', 'frc4450', 'frc4469', 'frc4488', 'frc4512', 'frc4513', 'frc4579', 'frc4662', 'frc4681', 'frc4682', 'frc4692', 'frc488', 'frc4911', 'frc4915', 'frc4918', 'frc492', 'frc4980', 'frc5295', 'frc5468', 'frc5588', 'frc568', 'frc5683', 'frc5827', 'frc5920', 'frc5937', 'frc5

In [33]:
# Finals, quarterfinales, qualifiers, semifinals
Counter([x.comp_level for x in data])

Counter({'f': 240, 'qm': 8314, 'sf': 1364})

In [34]:
# We'll only train based on qualifier matches

qualifiers = [x for x in data if x.comp_level=='qm'] 

Create aggregate team statistics for all teams, and a separate set for PNW teams

In [35]:

from featurization import addMatch

teamAggregates = {}
pnwAggregates  = {}


import re


for m in qualifiers:    
    for t in m.alliances.red.team_keys:
        addMatch(t, m, teamAggregates)
        if m.event_key in pnw_district:
            addMatch(t, m, pnwAggregates)
    for t in m.alliances.blue.team_keys:
        addMatch(t,m, teamAggregates)
        if m.event_key in pnw_district:
            addMatch(t, m, pnwAggregates)
        
# normalize the aggregates -- TODO: move all this code to featurization.py
def normalize(aggregates):
    for t in aggregates:
        for k in aggregates[t]:
            if k=='totalMatches':
                continue
            aggregates[t][k]/=aggregates[t]['totalMatches']
        aggregates[t]['totalMatches'] = 1.0

normalize(teamAggregates)
normalize(pnwAggregates)

with open('teamStats_2023.pkl','wb') as f:
    pickle.dump(teamAggregates,f)

In [36]:
Counter([m.event_key for m in data if m.event_key in pnw_district])

Counter({'2023orore': 84,
         '2023orsal': 78,
         '2023orwil': 69,
         '2023wabon': 76,
         '2023wasam': 58,
         '2023wasno': 83,
         '2023wayak': 69})

Next steps: for a given event we want to decide how to prioritize alliance choices.  Suppose we have all the data to date, as well as all the qualifier data for the event. Who should we choose as partners?

In [37]:
# optimize alliances for {target_id} at {event} on second pick:
import random
from tqdm import tqdm
from featurization import featurizeAlliances
from itertools import combinations

event = '2023wasam'
target_id = 492
teams = set([t for x in \
    [m.alliances.red.team_keys + m.alliances.blue.team_keys for m in matches['matches'][event]] \
        for t in x])
print(f'There are {len(teams)} teams at this event')

model_fn = 'model_2023_forest.pkl'
with open(model_fn, 'rb') as inF:
    vectorizer, model = pickle.load(inF)

# wasno
taken_wasno = [
    #2910, 2930, 7627, 488, 4911, # 4915, 1318, 2522 -- for 4911 on their first pick.
    #2910, 4131, 6443, 1595, 2811, 5920, 5468, 492, 2147, 1318, 955, 3711, 7461, 4061, 4980
    #2910, 360, 1983, 2412, 2976, 7461, 1899, 488, 492
    2910, 360, 1983, 1899, 7461, 488, 4911, 2412, 492
]


taken = list(map(lambda x: f'frc{x}', taken_wasno))

# set to just ['frc492'] if you're ranking pairs
target = [f'frc{target_id}']
# target = ['frc492', 'frc1899']

# If you are ranking pairs
if len(target)==1:
    partners = [[x,y] for x in teams if x!=target for y in teams if y!=target if x<y and x not in taken and y not in taken]
else:
    partners = [[x] for x in teams if x not in taken]
results = {}
features = []
alliances = []
trials = 500 

# iterate through all the potential pairs of partners. 
# For each pair, we sample {trials} opponent alliances and assess whether we think they will win.
for p in tqdm(partners): 
    red = target + p  
    candidates = [x for x in teams if x not in red]
    
    # Run trials sampling blue alliances from the remaining teams.
    # Here we're sampling any possible alliance, except the members of red.
    # We just run {trials} samples because a typical event has about 34 teams, which would yield 
    # more than 5000 alliances, requiring more than 2 million match predictions in total
    
    for m in range(trials):
        blue = random.sample(candidates, 3)
        f = featurizeAlliances(teamAggregates, red, blue)
        features.append(f)
        alliances.append([red,blue])

# run all the simulated matches through the model
print(f"Running {len(features)} predictions")
scores = model.predict_proba(vectorizer.transform(features))
results = {}
for (p,_), score in zip(alliances, scores): 
    p = tuple(p)
    if p not in results:
        results[p] = 0
    results[p] += score[1]    


for r in list(sorted(results, key=lambda x: results[x], reverse=True))[:100]:
    print(r, results[r]*100/trials)

There are 32 teams at this event


100%|████████████████████████████████████████████████████████████████████████████████| 253/253 [01:42<00:00,  2.48it/s]


Running 126500 predictions
('frc492', 'frc2976', 'frc948') 77.9200666666667
('frc492', 'frc2976', 'frc5827') 77.61066666666666
('frc492', 'frc2976', 'frc8051') 77.34456666666662
('frc492', 'frc2976', 'frc4682') 76.52530000000006
('frc492', 'frc2976', 'frc5937') 74.86433333333342
('frc492', 'frc2976', 'frc4173') 74.84570000000004
('frc492', 'frc2976', 'frc3070') 73.73480000000006
('frc492', 'frc2976', 'frc949') 72.29946666666669
('frc492', 'frc2976', 'frc5941') 72.09829999999992
('frc492', 'frc8051', 'frc948') 71.58316666666674
('frc492', 'frc2976', 'frc9023') 70.41349999999991
('frc492', 'frc4173', 'frc8051') 70.07789999999997
('frc492', 'frc2976', 'frc9036') 69.90786666666669
('frc492', 'frc2976', 'frc3681') 69.13596666666666
('frc492', 'frc2928', 'frc2976') 68.22556666666662
('frc492', 'frc4173', 'frc948') 67.93723333333335
('frc492', 'frc2976', 'frc8248') 67.92473333333339
('frc492', 'frc2976', 'frc3876') 67.41806666666663
('frc492', 'frc3070', 'frc4173') 67.41153333333342
('frc492'

In [24]:
to_predict = [
    [1899,492,948,9023,7461,488],
    [5941,492,8248,7461,948,5937],
    [492,5937,2903,3876,1983,949]
]

features = []
alliances = []
for m in to_predict:
    teams = list(map(lambda x: f'frc{x}', m))
    red = teams[0:3]
    blue = teams[3:]
    f = featurizeAlliances(teamAggregates, red, blue)
    features.append(f)
    alliances.append([red,blue])

print(f"Running {len(features)} predictions")
scores = model.predict_proba(vectorizer.transform(features))
results = {}
print(alliances)
print(scores)
for (p,_), score in zip(alliances, scores): 
    p = tuple(p)
    if p not in results:
        results[p] = 0
    results[p] += score[1]    


#print(results)
#print(alliances)

for r in results: #list(sorted(results, key=lambda x: results[x], reverse=True))[:100]:
    print(r, results[r]*100)

                           

Running 4 predictions
[[['frc492', 'frc492', 'frc492'], ['frc2910', 'frc2910', 'frc2910']], [['frc1899', 'frc492', 'frc948'], ['frc9023', 'frc7461', 'frc488']], [['frc5941', 'frc492', 'frc8248'], ['frc7461', 'frc948', 'frc5937']], [['frc492', 'frc5937', 'frc2903'], ['frc3876', 'frc1983', 'frc949']]]
[[0.74833333 0.25166667]
 [0.86       0.14      ]
 [0.58       0.42      ]
 [0.76       0.24      ]]
('frc492', 'frc492', 'frc492') 25.166666666666664
('frc1899', 'frc492', 'frc948') 14.000000000000002
('frc5941', 'frc492', 'frc8248') 42.0
('frc492', 'frc5937', 'frc2903') 24.0


In [51]:
## Brackets

alliances =  {
 'A1':   [7461, 2910,5827],
 'A2':  [488, 360, 4450],
 'A3':  [4911, 2412, 4512],
 'A4':  [5937, 1983, 3070],
 'A5':   [2976, 1899, 9023],
 'A6':   [492, 4682, 3681],
 'A7':   [1294, 948, 8248],
 'A8':   [9036, 949, 2928]
}

for k in alliances:
    alliances[k] = list(map(lambda x: f'frc{x}', alliances[k]))

bracket = {
    1: ['A1', 'A8'],
    2: ['A4', 'A5'],
    3: ['A2', 'A7'],
    4: ['A3', 'A6'],
    5: ['L1', 'L2'],
    6: ['L3', 'L4'],
    7: ['W1', 'W2'],
    8: ['W3', 'W4'],
    9: ['L7', 'W6'],
    10: ['W5', 'L8'],
    11: ['W7', 'W8'],
    12: ['W10', 'W9'],
    13: ['L11', 'W12'],
    14: ['W11', 'W13'],
    15: ['W14', 'L14'],
    16: ['W15', 'L15']
}
        
def runMatch(matchNumber):
    red_id,blue_id = bracket[matchNumber]
    red = alliances[red_id]
    blue =alliances[blue_id]
    
    f = featurizeAlliances(teamAggregates, red, blue)
    scores = model.predict_proba(vectorizer.transform([f]))
    pRed = scores[0][1] 
    if random.random()<pRed:        
        winner = red
        loser = blue
    else:
        winner = blue
        loser = red
    alliances[f'W{matchNumber}'] = winner
    alliances[f'L{matchNumber}'] = loser
    #print(f'{winner} beats {loser} in match {matchNumber}')
        
def runBracket():
    for i in range(1,17):
        runMatch(i)
    wins = Counter()
    for i in range(14,17):
        w = alliances[f'W{i}']
        wins[str(w)]+=1
    return sorted(wins, reverse=True, key=lambda x: wins[x])[0], (alliances['A6'] in [alliances['W11'],alliances['W13']])

overall = Counter()
inFinalCtr = 0
for b in tqdm(range(1000)):
    (w, inFinal) = runBracket()
    overall[w] += 1
    inFinalCtr += 1 if inFinal else 0
        
for k in sorted(overall, key=lambda x: overall[x], reverse=True):
    print(k, overall[k])

print(f'inFinal: {inFinalCtr}')

100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [08:28<00:00,  1.96it/s]

['frc7461', 'frc2910', 'frc5827'] 701
['frc488', 'frc360', 'frc4450'] 191
['frc4911', 'frc2412', 'frc4512'] 49
['frc5937', 'frc1983', 'frc3070'] 37
['frc2976', 'frc1899', 'frc9023'] 14
['frc492', 'frc4682', 'frc3681'] 8
inFinal: 59





In [48]:
# swap alliance 1 and 6 third choice

alliances =  {
 'A1':   [7461, 2910,3681],
 'A2':  [488, 360, 4450],
 'A3':  [4911, 2412, 4512],
 'A4':  [5937, 1983, 3070],
 'A5':   [2976, 1899, 9023],
 'A6':   [492, 4682, 5827],
 'A7':   [1294, 948, 8248],
 'A8':   [9036, 949, 2928]
}

for k in alliances:
    alliances[k] = list(map(lambda x: f'frc{x}', alliances[k]))


overall = Counter()
for b in tqdm(range(1000)):
    w = runBracket()
    overall[w] += 1
        
for k in sorted(overall, key=lambda x: overall[x], reverse=True):
    print(k, overall[k])

100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [03:45<00:00,  4.44it/s]

['frc7461', 'frc2910', 'frc3681'] 843
['frc488', 'frc360', 'frc4450'] 87
['frc4911', 'frc2412', 'frc4512'] 25
['frc5937', 'frc1983', 'frc3070'] 24
['frc492', 'frc4682', 'frc5827'] 13
['frc2976', 'frc1899', 'frc9023'] 8



