In [1]:
import csv
from collections import defaultdict
import tensorflow as tf
import tflearn
from random import sample
import numpy as np
from itertools import chain, repeat, islice

def pad_infinite(iterable, padding=None):
    return chain(iterable, repeat(padding))

def pad(iterable, size, padding=None):
    return list(islice(pad_infinite(iterable, padding), size))

In [2]:
DATA_PATH = "no_tiebreak_feature_data.csv"
LABELS_KEYS = ["match_winner","set_winner"]
FEATURE_KEYS = ["pt1","pt2","pt3","pt4","pt5","pt6",
                "deuce","ad_in","ad_out", "server", "returner",
                "game_winner"]

In [20]:
labels = {
    "1.0": [1, 0],
    "2.0": [0, 1]
}
# read in data
matches = defaultdict(lambda: {"features": [], "labels": []})
with open(DATA_PATH) as infile:
    reader = csv.DictReader(infile)
    for line in reader:
        match_id = line["match_id"]
        matches[match_id]["features"].append(
            [float(line[key]) for key in FEATURE_KEYS]
        )
        matches[match_id]["labels"] = labels[line["match_winner"]]

In [21]:
# now turn it into a matrix
xtrain, ytrain = [], []
xtest, ytest = [], []

blank_row = np.zeros(len(FEATURE_KEYS))

train_ids = sample(matches.keys(), int(0.8 * len(matches)))
for match_id in matches:
    for k in range(len(matches[match_id]["features"])):
        padded_features = pad(matches[match_id]["features"][:k], 100, blank_row)
        labels = matches[match_id]["labels"]
        if match_id in train_ids:
            xtrain.append(padded_features)
            ytrain.append(labels)
        else:
            xtest.append(padded_features)
            ytest.append(labels)
        
xtrain, ytrain = np.array(xtrain), np.array(ytrain)
xtest, ytest = np.array(xtest), np.array(ytest)

In [None]:
# Build neural network
net = tflearn.input_data(shape=[None, 100, len(FEATURE_KEYS)])
net = tflearn.lstm(net, 256)
net = tflearn.fully_connected(net, 2)
net = tflearn.regression(net, optimizer='adam', loss='mean_square')

In [None]:
# train the model
model = tflearn.DNN(net)
model.fit(xtrain, ytrain, n_epoch=10, batch_size=256, show_metric=True)

In [None]:
# evaluate model on test data
model.evaluate(xtest, ytest)

In [None]:
model.predict(xtest)

In [None]:
np.zeros(len(FEATURE_KEYS))

In [None]:
xtrain

In [23]:
xtrain.shape

(67556, 100, 12)

In [22]:
ytrain.shape

(67556, 2)

In [16]:
ytrain[0]

[[1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0]]

In [17]:
matches[match_id]["labels"]

[[0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1]]