# We'll start from the models we trained last time and try to optimize their parameters.  First load and train the model as before.

Load the training data into a pandas data frame.  

In [2]:
from __future__ import print_function
#import time
import sys
import pickle

sys.path.append('..')
import swagger_client as v3client
from swagger_client.rest import ApiException

filename = '../matches.pkl'
matches = []
with open(filename, 'rb') as f:
    matches = pickle.load(f)


In [3]:
teamAggregates = {}

def addMatch(team, m):    
    if team not in teamAggregates:       
        # initialize an empty record for the team.
        teamAggregates[team]= {
            'totalMatches':0, 
            'autoPoints':0, 
            'cargoPoints':0, 
            'completeRocketRankingPoints':0, 
            'completedRocketCount':0, 
            'habLevel1Count':0, 
            'habLevel2Count':0, 
            'habLevel3Count':0,             
            'foulCount':0, 
            'foulPoints':0, 
            'rocketPanelCount':0, 
            'rocketCargoCount':0, 
            'bayPanelCount':0, 
            'bayCargoCount':0, 
            'habLineCount':0, 
            'habDockingRankingPoints':0, 
            'habClimbPoints':0, 
            'hatchPanelPoints':0, 
            'rankingPoints':0, 
            'sandStormBonusPoints':0, 
            'techFoulCount':0, 
            'teleopPoints':0, 
            'totalPoints':0, 
            'winCount':0
        }

    alliance = 'blue' if team in m.alliances.blue.team_keys else 'red'
    points = m.score_breakdown[alliance]
    summary = teamAggregates[team]
    # update all of the fields.
    summary['totalMatches']+=1    
    summary['autoPoints']+=points['autoPoints']
    summary['cargoPoints']+=points['cargoPoints'] 
    summary['completeRocketRankingPoints']+=int(points['completeRocketRankingPoint']) 
    summary['completedRocketCount']+=int(points['completedRocketFar'])+int(points['completedRocketNear'])
    for r in [1,2,3]:
        l = points['endgameRobot'+str(r)]
        if l=='HabLevel1':
            summary['habLevel1Count']+=1
        elif l=='HabLevel2':
            summary['habLevel2Count']+=1
        elif l=='HabLevel3':
            summary['habLevel3Count']+=1
        h = points['habLineRobot'+str(r)]
        if h=='CrossedHabLineInSandstorm':
            summary['habLineCount']+=1

    summary['foulCount']+=points['foulCount']
    summary['foulPoints']+=points['foulPoints']
    
    # Rocket cargo and panel positions
    for l in ['low','mid','top']:
            for s in ['Left','Right']:
                for n in ['Near','Far']:
                    r = l+s+'Rocket'+n  #e.g. lowLeftRocketNear
                    if points[r]=='Panel':
                        summary['rocketPanelCount']+=1
                    if points[r]=='PanelAndCargo':
                        summary['rocketPanelCount']+=1
                        summary['rocketCargoCount']+=1
    # bays 1:8                 
    for b in range(1,9): 
        bay = 'bay'+str(b)
        if points[bay]=='Panel':
            summary['bayPanelCount']+=1
        if points[bay]=='PanelAndCargo':
            summary['bayPanelCount']+=1
            summary['bayCargoCount']+=1
            
    summary['habDockingRankingPoints']+=int(points['habDockingRankingPoint'])
    summary['habClimbPoints']+=points['habClimbPoints'] 
    summary['hatchPanelPoints']+=points['hatchPanelPoints']
    summary['rankingPoints']+=points['rp']
    summary['sandStormBonusPoints']+=points['sandStormBonusPoints']
    summary['techFoulCount']+=points['techFoulCount']
    summary['teleopPoints']+=points['teleopPoints']
    summary['totalPoints']+=points['totalPoints']
    summary['winCount']+=int(m.winning_alliance==alliance)


for m in matches:
    # only aggregate statistics for regional matches- skip the districts or we're cheating.
    if m.event_key=='2019pncmp':
        continue
    for t in m.alliances.red.team_keys:
        addMatch(t,m)
    for t in m.alliances.blue.team_keys:
        addMatch(t,m)
        
# normalize the aggregates
for t in teamAggregates:
    for k in ['autoPoints', 'cargoPoints', 'completeRocketRankingPoints', 'completedRocketCount', 'habLevel1Count', 'habLevel2Count', 'habLevel3Count', 'foulCount', 'foulPoints', 'rocketPanelCount', 'rocketCargoCount', 'bayPanelCount', 'bayCargoCount', 'habLineCount', 'habDockingRankingPoints', 'habClimbPoints', 'hatchPanelPoints', 'rankingPoints', 'sandStormBonusPoints', 'techFoulCount', 'teleopPoints', 'totalPoints', 'winCount']:
        teamAggregates[t][k]/=teamAggregates[t]['totalMatches']

with open('../teamStats.pkl','wb') as f:
    pickle.dump(teamAggregates,f)

Now we have statistics for every team.  We can use this to generate features for every match.

In [4]:

def featurizeMatch(m):
    match_features = { 'red_missingCount':0, 'blue_missingCount': 0 }
    count=0    
    allKeys = set()
    for t in m.alliances.red.team_keys:
        if t not in teamAggregates:
            match_features['red_missingCount']+=1
            continue
        for k in teamAggregates[t]:
            key = 'red_'+k;
            if key not in match_features:
                match_features[key]=0
            match_features[key]+=teamAggregates[t][k]
            allKeys.add(key)
        count+=1
    # compute the average
    for k in allKeys:
        match_features[k]/=count
    count=0
    allKeys=set()
    for t in m.alliances.blue.team_keys:
        if t not in teamAggregates:
            match_features['blue_missingCount']+=1
            continue
        for k in teamAggregates[t]:
            key = 'blue_'+k;
            if key not in match_features:
                match_features[key]=0
            match_features[key]+=teamAggregates[t][k]
            allKeys.add(key)
        count+=1
    # compute the average
    for k in allKeys:
        match_features[k]/=count
    match_features['event']=m.event_key
    match_features['label']=int(m.winning_alliance=='red')    
    return match_features

features = []

for m in matches:
    features.append(featurizeMatch(m))
    


In [5]:
from sklearn.feature_extraction import DictVectorizer
# create train and test sets
train = []
trainY = []
test = []
testY = []

for m in features:
    if 'event' not in m:
        print(m)
    event = m['event']
    label = m['label']
    del m['event']
    del m['label']
    if event == '2019pncmp':
        test.append(m)
        testY.append(label)
    else:
        train.append(m)
        trainY.append(label)
        
vectorizer = DictVectorizer()
trainX = vectorizer.fit_transform(train)
testX = vectorizer.transform(test)



In [6]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=100, random_state=0, min_samples_split=3)  
classifier.fit(trainX,trainY)
forest_predictions= classifier.predict(testX)
import numpy as np
from sklearn.metrics import accuracy_score
#np.sum(np.abs(scores-Ytest))
accuracy_score(testY, forest_predictions)

0.7062937062937062

Let's look at the possible parameters we could tune. 

In [7]:
classifier.get_params()

{'bootstrap': True,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 3,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 0,
 'verbose': 0,
 'warm_start': False}

We can run a basic grid search over some of these parameters: let's vary min_samples_leaf, min_samples_split, max_depth, and n_estimators.

In [8]:

grid_params = {'max_depth': [None, 3, 10], 'min_samples_leaf':[1,3,5], 'min_samples_split':[3,5,10], 'n_estimators':[1,8,32,100,200]}

In [9]:
from sklearn.model_selection import ParameterGrid
classifier=RandomForestClassifier()
best_score = 0
best_g=None

for g in ParameterGrid(grid_params):
    classifier.set_params(**g)
    classifier.fit(trainX,trainY)
    forest_predictions= classifier.predict(testX)
    score = accuracy_score(testY, forest_predictions)
    if score>best_score:
        best_score=score
        best_g = g

print("Accuracy: {}".format(best_score))
print("Grid:", best_g)

Accuracy: 0.7202797202797203
Grid: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}
