# Rocket League 8: The Ocho

## Imports

In [59]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
import featuretools as ft
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from imblearn.over_sampling import SMOTE
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import VarianceThreshold
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.preprocessing import MinMaxScaler, StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

## Read in

In [22]:
matches =  pd.read_csv('../data/train.csv')
test = pd.read_csv('../data/train.csv')

## Converters and functions

In [23]:
converter = { 'bronze': 1, 'silver': 2, 'gold': 3, 'platinum': 4, 'diamond': 5, 'champion': 6 }
order = ['bronze', 'silver', 'gold', 'platinum', 'diamond', 'champion']
order_dict = {'rank':order}
catvars = ['rank', 'color', 'map_code', 'car_name']
skewvars = ['avg_powerslide_duration',
            'demos_inflicted',                       
            'demos_taken',                    
            'percent_most_back',                     
            'percent_most_forward',                  
            'percent_closest_to_ball',               
            'percent_farthest_from_ball']
useless = ['assists', 'mvp', 'map_code','color','time_defensive_third',
            'time_neutral_third', 'time_offensive_third', 'time_defensive_half',
            'time_offensive_half', 'time_behind_ball', 'time_infront_ball',
            'time_most_back', 'time_most_forward',
            'goals_against_while_last_defender', 'time_closest_to_ball',
            'time_farthest_from_ball', 'percent_defensive_third',
            'percent_offensive_third', 'percent_neutral_third',
            'percent_defensive_half', 'percent_offensive_half',
            'percent_behind_ball', 'percent_infront_ball', 'percent_most_back',
            'percent_most_forward', 'percent_closest_to_ball',
            'percent_farthest_from_ball', 'demos_inflicted', 'demos_taken']

In [24]:
def find_outliers(col):

    try:
        Q1 = col.quantile(0.25)
        Q3 = col.quantile(0.75)
        IQR = Q3 - Q1
        lowbound = Q1-(1.5*IQR)
        highbound = Q3+(1.5*IQR)
        df_outliers = (col >= lowbound) & (col <= highbound)
    except:
        df_outliers = (col == col)

    return df_outliers

def filter_outliers(df):

    filtered_df = df[df.apply(find_outliers).all(axis = 'columns')]

    return filtered_df

In [25]:
def use_filter_outliers(df, filtout):

    if filtout == True:
        return filter_outliers(df)
    else:
        return df

def plotly_feature(df, col, filtout = True):

    fig = px.box(
            use_filter_outliers(df, filtout),
            x = 'rank',
            y = col,
            category_orders=order_dict,
            color = 'rank'
            )
    return fig.show()

## Reshape matches

In [26]:
matches.shape

(60242, 91)

In [27]:
matches.columns[:30]

Index(['match_id', 'color', 'rank', 'map_code', 'duration', 'car_name',
       'possession_time', 'time_in_side', 'shots', 'shots_against', 'goals',
       'goals_against', 'saves', 'assists', 'score', 'mvp',
       'shooting_percentage', 'bpm', 'bcpm', 'avg_amount', 'amount_collected',
       'amount_stolen', 'amount_collected_big', 'amount_stolen_big',
       'amount_collected_small', 'amount_stolen_small', 'count_collected_big',
       'count_stolen_big', 'count_collected_small', 'count_stolen_small'],
      dtype='object')

In [28]:
matches.columns[30:60]

Index(['amount_overfill', 'amount_overfill_stolen',
       'amount_used_while_supersonic', 'time_zero_boost', 'percent_zero_boost',
       'time_full_boost', 'percent_full_boost', 'time_boost_0_25',
       'time_boost_25_50', 'time_boost_50_75', 'time_boost_75_100',
       'percent_boost_0_25', 'percent_boost_25_50', 'percent_boost_50_75',
       'percent_boost_75_100', 'avg_speed', 'total_distance',
       'time_supersonic_speed', 'time_boost_speed', 'time_slow_speed',
       'time_ground', 'time_low_air', 'time_high_air', 'time_powerslide',
       'count_powerslide', 'avg_powerslide_duration', 'avg_speed_percentage',
       'percent_slow_speed', 'percent_boost_speed',
       'percent_supersonic_speed'],
      dtype='object')

In [29]:
matches.columns[60:]

Index(['percent_ground', 'percent_low_air', 'percent_high_air',
       'avg_distance_to_ball', 'avg_distance_to_ball_possession',
       'avg_distance_to_ball_no_possession', 'time_defensive_third',
       'time_neutral_third', 'time_offensive_third', 'time_defensive_half',
       'time_offensive_half', 'time_behind_ball', 'time_infront_ball',
       'time_most_back', 'time_most_forward',
       'goals_against_while_last_defender', 'time_closest_to_ball',
       'time_farthest_from_ball', 'percent_defensive_third',
       'percent_offensive_third', 'percent_neutral_third',
       'percent_defensive_half', 'percent_offensive_half',
       'percent_behind_ball', 'percent_infront_ball', 'percent_most_back',
       'percent_most_forward', 'percent_closest_to_ball',
       'percent_farthest_from_ball', 'demos_inflicted', 'demos_taken'],
      dtype='object')

In [30]:
matches_win = matches.sort_values(['match_id', 'goals', 'score'], ascending=[True, False, False]).drop_duplicates(subset = ['match_id'], keep = 'first')
matches_lose = matches.sort_values(['match_id', 'goals', 'score'], ascending=[True, False, False]).drop_duplicates(subset = ['match_id'], keep = 'first')
matches_wide = matches_win.merge(matches_lose, on = ['match_id', 'rank', 'map_code'], suffixes=('_win','_lose'))

In [31]:
matches_wide.shape

(30121, 179)

## Engineer features

In [53]:
matches_wider = matches_wide.assign(
    percent_closest_diff = lambda x: x['percent_closest_to_ball_win']/x['percent_closest_to_ball_lose'],
    ground_low_air = lambda x: x['percent_ground_win']+x['percent_low_air_win']+x['percent_ground_lose']+x['percent_low_air_lose'],
    boosts_per_collected = lambda x: (x['bpm_win']+x['bpm_lose'])/(x['bcpm_win']+x['bcpm_lose']),
    save_prop = lambda x: (x['saves_win']+x['saves_lose'])/(x['shots_against_win']+x['shots_against_lose']),
    mean_percent_supersonic_speed = lambda x: (x['percent_supersonic_speed_win']+x['percent_supersonic_speed_lose'])/2,
    mean_percent_supersonic_speed_squared = lambda x: ((x['percent_supersonic_speed_win']+x['percent_supersonic_speed_lose'])/2)**2,
    avg_powerslide_durations = lambda x: np.log(((x['avg_powerslide_duration_win']+x['avg_powerslide_duration_lose'])/2)+0.01)
)

In [63]:
matches_eng = matches_wider.drop(columns = matches_wide.drop(columns = ['match_id','rank']).columns).groupby('rank').apply(filter_outliers)
matches_eng.columns

Index(['match_id', 'rank', 'percent_closest_diff', 'ground_low_air',
       'boosts_per_collected', 'save_prop', 'mean_percent_supersonic_speed',
       'mean_percent_supersonic_speed_squared', 'avg_powerslide_durations'],
      dtype='object')

## Model with features and check importance

Unimpressive

In [58]:
X = matches_eng.drop(columns = ['rank'])
y = matches_eng['rank']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify = y)

pipe = Pipeline(steps = [
        ('vt', VarianceThreshold()),
        ('scaler', StandardScaler()),
        ('rf', RandomForestClassifier())
    ])

pipe.fit(X_train, y_train)
accuracy_score(y_test, pipe.predict(X_test))

0.3846273533530419

## Old model with only winners/losers data

Neither dataset seems more predictive than the other, so best to keep them together.

- With outliers, both perform around 0.45
- Without outliers, both perform around 0.48

In [39]:
matches_lose_fo = matches_lose.fillna(0)#.groupby('rank').apply(filter_outliers)
X = matches_lose_fo.drop(columns = ['rank','car_name', 'map_code', 'color'])
y = matches_lose_fo['rank']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify = y)

pipe = Pipeline(steps = [
        ('vt', VarianceThreshold()),
        ('scaler', StandardScaler()),
        ('rf', RandomForestClassifier())
    ])

pipe.fit(X_train, y_train)
accuracy_score(y_test, pipe.predict(X_test))

0.4485460098260523

In [41]:
matches_win_fo = matches_win.fillna(0)#.groupby('rank').apply(filter_outliers)
X = matches_win_fo.drop(columns = ['rank','car_name', 'map_code', 'color'])
y = matches_win_fo['rank']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify = y)

pipe = Pipeline(steps = [
        ('vt', VarianceThreshold()),
        ('scaler', StandardScaler()),
        ('rf', RandomForestClassifier())
    ])

pipe.fit(X_train, y_train)
accuracy_score(y_test, pipe.predict(X_test))

0.44615588899216574

## Experimenting with FeatureTools

In [71]:
es = ft.EntitySet("rocket_league")

es.entity_from_dataframe(
                entity_id='matches_engineered', 
                dataframe = matches_eng,
                index = 'match_id'  
    )

feature_matrix, feature_defs = ft.dfs(entityset=es,
                    target_entity="matches_engineered",
                    agg_primitives=["mean", "sum", "mode"],
                    max_depth=1
    )

feature_matrix

  agg_primitives: ['mean', 'mode', 'sum']
This may be caused by a using a value of max_depth that is too small, not setting interesting values, or it may indicate no compatible variable types for the primitive were found in the data.


Unnamed: 0_level_0,rank,percent_closest_diff,ground_low_air,boosts_per_collected,save_prop,mean_percent_supersonic_speed,mean_percent_supersonic_speed_squared,avg_powerslide_durations
match_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
11,bronze,1.0,194.225868,0.892074,0.363636,8.659224,74.982160,-1.660731
13,bronze,1.0,196.572888,0.972169,0.750000,5.111286,26.125245,-1.427116
232,bronze,1.0,198.380108,1.033721,0.250000,3.652231,13.338793,-0.941609
293,bronze,1.0,191.722042,1.066379,0.500000,11.239795,126.332992,-1.108663
560,bronze,1.0,194.449112,1.062760,0.400000,7.217287,52.089232,-0.994252
...,...,...,...,...,...,...,...,...
30108,silver,1.0,198.765044,0.852691,0.142857,4.115631,16.938415,-1.897120
30111,silver,1.0,199.488894,0.959123,0.000000,5.637619,31.782742,-2.120264
30112,silver,1.0,197.242386,1.084417,0.428571,8.171837,66.778920,-1.560648
30118,silver,1.0,190.563732,1.030523,0.500000,9.171167,84.110304,-0.916291
