# Last time we looked a predicting District matches from the regional events. Let's add some more features to our model.

Load the training data into a pandas data frame.  

In [1]:
from __future__ import print_function
import pickle
#import time
import sys
sys.path.append('..')
import swagger_client as v3client
from swagger_client.rest import ApiException

filename = '../matches.pkl'
matches = []
with open(filename, 'rb') as f:
    matches = pickle.load(f)


Preview a match object

In [2]:
matches[0]

{'actual_time': 1554589856,
 'alliances': {'blue': {'dq_team_keys': [],
                        'score': 80,
                        'surrogate_team_keys': [],
                        'team_keys': ['frc2930', 'frc4488', 'frc5468']},
               'red': {'dq_team_keys': [],
                       'score': 91,
                       'surrogate_team_keys': [],
                       'team_keys': ['frc2910', 'frc2046', 'frc2907']}},
 'comp_level': 'f',
 'event_key': '2019pncmp',
 'key': '2019pncmp_f1m1',
 'match_number': 1,
 'post_result_time': 1554590074,
 'predicted_time': 1554589863,
 'score_breakdown': {'blue': {'adjustPoints': 0,
                              'autoPoints': 15,
                              'bay1': 'PanelAndCargo',
                              'bay2': 'PanelAndCargo',
                              'bay3': 'PanelAndCargo',
                              'bay4': 'PanelAndCargo',
                              'bay5': 'PanelAndCargo',
                              'bay6'

As you can see there is a lot of information here.  Suppose we want to collect all of this together so that at the end of the regionals we have score statistics for every team.  Let's aggregate stats for each of these fields: 'totalMatches', 
'autoPoints', 'cargoPoints', 'completeRocketRankingPoints', 'completedRocketCount', 'habLevel1Count', 'habLevel2Count', 'habLevel3Count', 'foulCount', 'foulPoints', 'rocketPanelCount', 'rocketCargoCount', 'bayPanelCount', 'bayCargoCount', 'habLineCount', 'habDockingRankingPoints', 'habClimbPoints', 'hatchPanelPoints', 'rankingPoints', 'sandStormBonusPoints', 'techFoulCount', 'teleopPoints', 'totalPoints', 'winCount'

In [3]:
teamAggregates = {}

def addMatch(team, m):    
    if team not in teamAggregates:       
        # initialize an empty record for the team.
        teamAggregates[team]= {
            'totalMatches':0, 
            'autoPoints':0, 
            'cargoPoints':0, 
            'completeRocketRankingPoints':0, 
            'completedRocketCount':0, 
            'habLevel1Count':0, 
            'habLevel2Count':0, 
            'habLevel3Count':0,             
            'foulCount':0, 
            'foulPoints':0, 
            'rocketPanelCount':0, 
            'rocketCargoCount':0, 
            'bayPanelCount':0, 
            'bayCargoCount':0, 
            'habLineCount':0, 
            'habDockingRankingPoints':0, 
            'habClimbPoints':0, 
            'hatchPanelPoints':0, 
            'rankingPoints':0, 
            'sandStormBonusPoints':0, 
            'techFoulCount':0, 
            'teleopPoints':0, 
            'totalPoints':0, 
            'winCount':0
        }

    alliance = 'blue' if team in m.alliances.blue.team_keys else 'red'
    points = m.score_breakdown[alliance]
    summary = teamAggregates[team]
    # update all of the fields.
    summary['totalMatches']+=1    
    summary['autoPoints']+=points['autoPoints']
    summary['cargoPoints']+=points['cargoPoints'] 
    summary['completeRocketRankingPoints']+=int(points['completeRocketRankingPoint']) 
    summary['completedRocketCount']+=int(points['completedRocketFar'])+int(points['completedRocketNear'])
    for r in [1,2,3]:
        l = points['endgameRobot'+str(r)]
        if l=='HabLevel1':
            summary['habLevel1Count']+=1
        elif l=='HabLevel2':
            summary['habLevel2Count']+=1
        elif l=='HabLevel3':
            summary['habLevel3Count']+=1
        h = points['habLineRobot'+str(r)]
        if h=='CrossedHabLineInSandstorm':
            summary['habLineCount']+=1

    summary['foulCount']+=points['foulCount']
    summary['foulPoints']+=points['foulPoints']
    
    # Rocket cargo and panel positions
    for l in ['low','mid','top']:
            for s in ['Left','Right']:
                for n in ['Near','Far']:
                    r = l+s+'Rocket'+n  #e.g. lowLeftRocketNear
                    if points[r]=='Panel':
                        summary['rocketPanelCount']+=1
                    if points[r]=='PanelAndCargo':
                        summary['rocketPanelCount']+=1
                        summary['rocketCargoCount']+=1
    # bays 1:8                 
    for b in range(1,9): 
        bay = 'bay'+str(b)
        if points[bay]=='Panel':
            summary['bayPanelCount']+=1
        if points[bay]=='PanelAndCargo':
            summary['bayPanelCount']+=1
            summary['bayCargoCount']+=1
            
    summary['habDockingRankingPoints']+=int(points['habDockingRankingPoint'])
    summary['habClimbPoints']+=points['habClimbPoints'] 
    summary['hatchPanelPoints']+=points['hatchPanelPoints']
    summary['rankingPoints']+=points['rp']
    summary['sandStormBonusPoints']+=points['sandStormBonusPoints']
    summary['techFoulCount']+=points['techFoulCount']
    summary['teleopPoints']+=points['teleopPoints']
    summary['totalPoints']+=points['totalPoints']
    summary['winCount']+=int(m.winning_alliance==alliance)


for m in matches:
    # only aggregate statistics for regional matches- skip the districts or we're cheating.
    if m.event_key=='2019pncmp':
        continue
    for t in m.alliances.red.team_keys:
        addMatch(t,m)
    for t in m.alliances.blue.team_keys:
        addMatch(t,m)
        
# normalize the aggregates
for t in teamAggregates:
    for k in ['autoPoints', 'cargoPoints', 'completeRocketRankingPoints', 'completedRocketCount', 'habLevel1Count', 'habLevel2Count', 'habLevel3Count', 'foulCount', 'foulPoints', 'rocketPanelCount', 'rocketCargoCount', 'bayPanelCount', 'bayCargoCount', 'habLineCount', 'habDockingRankingPoints', 'habClimbPoints', 'hatchPanelPoints', 'rankingPoints', 'sandStormBonusPoints', 'techFoulCount', 'teleopPoints', 'totalPoints', 'winCount']:
        teamAggregates[t][k]/=teamAggregates[t]['totalMatches']

with open('../teamStats.pkl','wb') as f:
    pickle.dump(teamAggregates,f)

In [4]:
teamAggregates['frc492']

{'totalMatches': 28,
 'autoPoints': 11.25,
 'cargoPoints': 18.214285714285715,
 'completeRocketRankingPoints': 0.03571428571428571,
 'completedRocketCount': 0.03571428571428571,
 'habLevel1Count': 1.5,
 'habLevel2Count': 0.2857142857142857,
 'habLevel3Count': 0.75,
 'foulCount': 0.25,
 'foulPoints': 2.5,
 'rocketPanelCount': 4.25,
 'rocketCargoCount': 2.3214285714285716,
 'bayPanelCount': 6.571428571428571,
 'bayCargoCount': 3.75,
 'habLineCount': 2.857142857142857,
 'habDockingRankingPoints': 0.6428571428571429,
 'habClimbPoints': 15.214285714285714,
 'hatchPanelPoints': 11.571428571428571,
 'rankingPoints': 1.6785714285714286,
 'sandStormBonusPoints': 11.25,
 'techFoulCount': 0.03571428571428571,
 'teleopPoints': 45.0,
 'totalPoints': 58.75,
 'winCount': 0.5}

Now we have statistics for every team.  We can use this to generate features for every match.

In [5]:

def featurizeMatch(m):
    match_features = { 'red_missingCount':0, 'blue_missingCount': 0 }
    count=0    
    allKeys = set()
    for t in m.alliances.red.team_keys:
        if t not in teamAggregates:
            match_features['red_missingCount']+=1
            continue
        for k in teamAggregates[t]:
            key = 'red_'+k;
            if key not in match_features:
                match_features[key]=0
            match_features[key]+=teamAggregates[t][k]
            allKeys.add(key)
        count+=1
    # compute the average
    for k in allKeys:
        match_features[k]/=count
    count=0
    allKeys=set()
    for t in m.alliances.blue.team_keys:
        if t not in teamAggregates:
            match_features['blue_missingCount']+=1
            continue
        for k in teamAggregates[t]:
            key = 'blue_'+k;
            if key not in match_features:
                match_features[key]=0
            match_features[key]+=teamAggregates[t][k]
            allKeys.add(key)
        count+=1
    # compute the average
    for k in allKeys:
        match_features[k]/=count
    match_features['event']=m.event_key
    match_features['label']=int(m.winning_alliance=='red')    
    return match_features

features = []

for m in matches:
    features.append(featurizeMatch(m))
    
features[0]

{'red_missingCount': 0,
 'blue_missingCount': 2,
 'red_totalMatches': 17.333333333333332,
 'red_autoPoints': 13.287134502923976,
 'red_cargoPoints': 26.199415204678363,
 'red_completeRocketRankingPoints': 0.07309941520467836,
 'red_completedRocketCount': 0.036062378167641324,
 'red_habLevel1Count': 1.2693957115009746,
 'red_habLevel2Count': 0.3005847953216374,
 'red_habLevel3Count': 0.8120857699805067,
 'red_foulCount': 0.5339181286549707,
 'red_foulPoints': 2.820272904483431,
 'red_rocketPanelCount': 4.2103313840155945,
 'red_rocketCargoCount': 2.9317738791423,
 'red_bayPanelCount': 7.276608187134504,
 'red_bayCargoCount': 5.8013645224171535,
 'red_habLineCount': 2.86588693957115,
 'red_habDockingRankingPoints': 0.5152046783625731,
 'red_habClimbPoints': 15.356725146198832,
 'red_hatchPanelPoints': 13.923976608187134,
 'red_rankingPoints': 1.6651072124756334,
 'red_sandStormBonusPoints': 13.287134502923976,
 'red_techFoulCount': 0.0,
 'red_teleopPoints': 55.48011695906433,
 'red_total

In [6]:
from sklearn.feature_extraction import DictVectorizer
# create train and test sets
train = []
trainY = []
test = []
testY = []

for m in features:
    if 'event' not in m:
        print(m)
    event = m['event']
    label = m['label']
    del m['event']
    del m['label']
    if event == '2019pncmp':
        test.append(m)
        testY.append(label)
    else:
        train.append(m)
        trainY.append(label)
        
vectorizer = DictVectorizer()
trainX = vectorizer.fit_transform(train)
testX = vectorizer.transform(test)



In [7]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=100, random_state=0, min_samples_split=3)  
classifier.fit(trainX,trainY)
forest_predictions= classifier.predict(testX)
import numpy as np
from sklearn.metrics import accuracy_score
#np.sum(np.abs(scores-Ytest))
accuracy_score(testY, forest_predictions)

0.7062937062937062

Let's look at the feature importances. They tell us how useful a specific feature is.

In [26]:
importances = classifier.feature_importances_
std = np.std([tree.feature_importances_ for tree in classifier.estimators_],
             axis=0)
indices = np.argsort(importances)[::-1]
names = vectorizer.feature_names_

# Print the feature ranking
print("Feature ranking:")

#for f in range(trainX.shape[1]):
#    print("%d. %s (%f)" % (f + 1, names[indices[f]], importances[indices[f]]))
colors = [names[indices[f]].split('_')[0] for f in range(trainX.shape[1])]
labels = [names[indices[f]].replace('Count','').replace('Points','').replace('red','r').replace('blue','b') for f in range(trainX.shape[1])]
# Plot the feature importances of the forest
%matplotlib notebook
import matplotlib.pyplot as plt
plt.figure()
plt.title("Feature importances")
plt.bar(range(trainX.shape[1]), importances[indices],
       color=colors, 
        #yerr=std[indices], 
        align="center")
plt.xticks(range(trainX.shape[1]), labels, rotation='vertical', fontsize='x-small')
    
plt.xlim([-1, trainX.shape[1]])
plt.subplots_adjust(bottom=0.5)
#plt.legend()
plt.show()

Feature ranking:


<IPython.core.display.Javascript object>

## Let's also try logistic regression.

In [122]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial', max_iter=10000)
classifier.fit(trainX,trainY)
lr_predictions= classifier.predict(testX)
accuracy_score(testY, lr_predictions)

0.6643356643356644

## Linear Regression

In [123]:
from sklearn.linear_model import RidgeClassifier
classifier = RidgeClassifier()
classifier.fit(trainX,trainY)
predictions= classifier.predict(testX)
accuracy_score(testY, predictions)

0.6223776223776224

So far our best score at predicting the districts is 0.664 using either RandomForests or LogisticRegression.  Next time we can explore hyperparameter tuning and also predicting which alliances would have been the best ones for us to join.