## Copy of randomforest using average_precision as metric in GridSearchCV.

* 1hr aproximate runtime

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

from collections import Counter
from itertools import combinations_with_replacement as combos
from itertools import permutations as perms
from tensorflow.keras import layers, Model
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.metrics import coverage_error, f1_score, label_ranking_average_precision_score, average_precision_score
from sklearn.model_selection import train_test_split, GridSearchCV
from category_encoders import OneHotEncoder
from tensorflow.data import Dataset


Establish game rules.

In [46]:
scoring_rules = [[100, 200, 1000, 2000, 4000, 5000],
                 [0, 0, 200, 400, 800, 5000],
                 [0, 0, 300, 600, 1200, 5000],
                 [0, 0, 400, 800, 1600, 5000],
                 [50, 100, 500, 1000, 2000, 5000],
                 [0, 0, 600, 1200, 2400, 5000]
                 ]


def is_three_pair(choice):
    choice = sorted(choice)
    return (len(choice) == 6 and choice[0] == choice[1] and
            choice[2] == choice[3] and choice[4] == choice[5])


def is_straight(choice):
    return sorted(choice) == list(range(1, 7))


def score_all():
    return [1.] * 6


def make_labels(roll):
    """Returns a label for each roll."""
    counts = Counter(roll)
    if is_three_pair(roll) and (sum(scoring_rules[die - 1][count - 1] for die, count in counts.items()) < 1500):
        choice = score_all()
    elif is_straight(roll):
        choice = score_all()
    else:
        picks = set()
        for die, count in counts.items():
            if scoring_rules[die - 1][count - 1] > 0:
                picks.add(die)
        choice = [0.] * 6
        for i, x in enumerate(roll):
            if x in picks:
                choice[i] = 1.
    return np.array(choice)


Make combinations of 6 dice throws.

In [3]:
def make_some_features(numbers, clip):
    features = set()
    combinations = (combo for combo in combos(numbers, 6))
    for i, comb in enumerate(combinations):
        if i % clip == 0:  # Keeping size reasonable
            for perm in perms(comb):
                features.add(perm)
    return features


Make arrays of throws and coresponding labels.

In [4]:
features = make_some_features(list(range(1, 7)), 2)

all_features = np.array([np.array(feature) for feature in features])

all_labels = np.array([make_labels(feature) for feature in all_features])

len(all_features), len(all_labels)

(23114, 23114)

In [5]:
def create_dataset(features, labels):
    data = {str(i): features[:,i] for i in range(6)}
    dataset = pd.DataFrame(data)
    label = {'{}_l'.format(i): labels[:,i] for i in range(6)}
    label_df = pd.DataFrame(label)
    df = pd.concat([dataset, label_df], axis=1, sort=False)
    return df

Create a DataFrame.

In [6]:
df = create_dataset(all_features, all_labels)

In [7]:
df.sample(10)

Unnamed: 0,0,1,2,3,4,5,0_l,1_l,2_l,3_l,4_l,5_l
21365,2,3,5,5,1,1,0.0,0.0,1.0,1.0,1.0,1.0
20612,6,5,2,6,5,5,0.0,1.0,0.0,0.0,1.0,1.0
14333,3,4,3,6,3,1,1.0,0.0,1.0,0.0,1.0,1.0
5416,4,2,2,4,3,5,0.0,0.0,0.0,0.0,0.0,1.0
429,6,4,2,4,3,4,0.0,1.0,0.0,1.0,0.0,1.0
1490,5,6,6,1,4,6,1.0,1.0,1.0,1.0,0.0,1.0
11539,3,1,6,5,3,6,0.0,1.0,0.0,1.0,0.0,0.0
9165,1,1,4,2,5,4,1.0,1.0,0.0,0.0,1.0,0.0
12548,4,5,3,1,4,6,0.0,1.0,0.0,1.0,0.0,0.0
13071,5,3,2,4,6,4,1.0,0.0,0.0,0.0,0.0,0.0


Separate X and y sets and split into training and test sets.

In [8]:
X = df[['0', '1', '2', '3', '4', '5']]
y = df[['0_l', '1_l', '2_l', '3_l', '4_l', '5_l']]

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, shuffle=True)

In [15]:
X_train.shape, y_train.shape

((17335, 6), (17335, 6))

In [16]:
X_test.shape, y_test.shape

((5779, 6), (5779, 6))

Extra Trees with hyperparameters chosen from earler cross validations.

In [18]:
extra = ExtraTreesClassifier(bootstrap=True,
                             oob_score=True,
                             n_jobs=-1,
                             n_estimators=2250)

Cross validation with grid search on min_sample_split and max_depth.

In [19]:
params = {'min_samples_split': [4, 5, 6],
          'max_depth': [27, 30, 33]}
grid = GridSearchCV(extra,
                    param_grid=params,
                    scoring='average_precision',
                    n_jobs=-1,
                    cv=5,
                    verbose=1)
grid.fit(X_train, y_train)
grid.best_params_, grid.best_score_

Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed: 44.4min finished


({'max_depth': 30, 'min_samples_split': 6}, 0.9759599000677779)

Refine n_estimators with grid search.

In [20]:
params = {'n_estimators': [1250, 1500, 1750, 2000, 2250, 2500]}
grid = GridSearchCV(grid.best_estimator_,
                    param_grid=params,
                    scoring='average_precision',
                    n_jobs=-1,
                    cv=5,
                    verbose=1)
grid.fit(X_train, y_train)
grid.best_params_, grid.best_score_

Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed: 29.8min finished


({'n_estimators': 2000}, 0.9759340084764407)

In [21]:
best = grid.best_estimator_

In [22]:
y_pred = np.array([best.predict([test])[0] for test in X_test.values])

In [23]:
y_test.sum().sum() / len(y_test)

3.033742862086866

In [24]:
coverage_error(y_pred, y_test)

3.0531233777470153

In [25]:
f1_score(y_test, y_pred, average='samples')

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


0.9027160962703444

In [26]:
label_ranking_average_precision_score(y_test, y_pred)

0.9735207456115055

In [27]:
average_precision_score(y_test, y_pred)

0.909635794804136

Examine individual predictions at the standard 0.5 probability threshold, and at different thresholds.

In [58]:
def test_model_pred(model, threshold=0.475, samples=25):
    """Get random sample of rolls from X_test and make predictions.
    
    Compare prediction precision with probability > 0.5 positive label with
    positive label at other thresholds by adjusting threshold.
    
    Print number of samples.
    
    :var threshold: float
    :var samples: int
    """
    for test in X_test.sample(samples).values:
        print(test)
        # Create ground truth label.
        true = make_labels(test).astype(int)
        print(true)
        
        # Raw probability predictions.
        pred_proba = np.array([round(y[0][1], 3) for y in model.predict_proba([list(test)])])
        print(pred_proba)
        
        # Predict 1 if probability > 0.5.
        pred = (pred_proba > 0.5).astype(int)
        print(pred)
        
        # Predict 1 if probability > threshold.
        pred_thresh = (pred_proba > threshold).astype(int)
        print(pred_thresh)
        
        result = 'Nailed it' if list(true) == list(pred) else 'Nuts'
        print(result)
        result = 'Nailed it' if list(true) == list(pred_thresh) else 'Nuts'
        print(result)
        print

In [60]:
test_model_pred(best, threshold=.475, samples=40)

[5 1 4 4 1 3]
[1 1 0 0 1 0]
[0.861 0.882 0.257 0.3   0.881 0.204]
[1 1 0 0 1 0]
[1 1 0 0 1 0]
Nailed it
Nailed it

[5 6 4 4 1 5]
[1 0 0 0 1 1]
[0.811 0.188 0.278 0.272 0.854 0.808]
[1 0 0 0 1 1]
[1 0 0 0 1 1]
Nailed it
Nailed it

[1 5 4 4 1 4]
[1 1 1 1 1 1]
[0.893 0.914 0.419 0.41  0.901 0.407]
[1 1 0 0 1 0]
[1 1 0 0 1 0]
Nuts
Nuts

[3 4 3 5 2 6]
[0 0 0 1 0 0]
[0.319 0.271 0.302 0.804 0.217 0.102]
[0 0 0 1 0 0]
[0 0 0 1 0 0]
Nailed it
Nailed it

[1 3 1 5 5 6]
[1 0 1 1 1 0]
[0.827 0.195 0.818 0.833 0.814 0.207]
[1 0 1 1 1 0]
[1 0 1 1 1 0]
Nailed it
Nailed it

[1 6 3 3 5 4]
[1 0 0 0 1 0]
[0.892 0.103 0.23  0.241 0.783 0.197]
[1 0 0 0 1 0]
[1 0 0 0 1 0]
Nailed it
Nailed it

[2 4 5 2 5 2]
[1 0 1 1 1 1]
[0.592 0.268 0.823 0.577 0.835 0.574]
[1 0 1 1 1 1]
[1 0 1 1 1 1]
Nailed it
Nailed it

[4 3 4 1 3 3]
[0 1 0 1 1 1]
[0.492 0.524 0.503 0.93  0.459 0.508]
[0 1 1 1 0 1]
[1 1 1 1 0 1]
Nuts
Nuts

[5 4 2 5 5 6]
[1 0 0 1 1 0]
[0.779 0.16  0.171 0.776 0.778 0.146]
[1 0 0 1 1 0]
[1 0 0 1 1 0]
Nailed

In [65]:
def test_threshold_precision(model, thresholds):
    """Test array of threshold values and caluculate precition metrics for each.
    
    Calculate each threshold on a random sample of test data.
    Store and return in a dict.
    """
    results = dict()
    # This is going to take a while...
    size = len(X_test.values) / 10
    for threshold in thresholds:
        # Get sample of dice throws.
        throws = X_test.sample(size).values
        
        # Make predictions.
        y_pred = np.array([best.predict([dice])[0] for dice in throws])
        
        # Ground truth labels.
        true = np.arra([make_labels(dice) for dice in throws])
        
        # Caluculate metrics.
        f_one = f1_score(true, y_pred, average='samples')
        label_ranking = label_ranking_average_precision_score(true, y_pred)
        average_precision = average_precision_score(true, y_pred)
        
        # Save result.
        results[threshold] = {'f1_score': f_one,
                              'Label ranking average precision': label_ranking,
                              'Average precision': average_precision}
        
    return results

In [66]:
thresholds = np.linspace(.47, .5, 10)

In [None]:
threshold_test = test_threshold_precision(best, thresholds)