In [1]:
import csv
from collections import defaultdict
import tensorflow as tf
import tflearn
from random import sample
import numpy as np

In [2]:
DATA_PATH = "no_tiebreak_feature_data.csv"
LABELS_KEYS = ["match_winner","set_winner","game_winner"]
FEATURE_KEYS = ["pt1","pt2","pt3","pt4","pt5","pt6",
                "deuce","ad_in","ad_out", "server", "returner"]

In [3]:
# read in data
matches = defaultdict(lambda: {"features": [], "labels": []})
with open(DATA_PATH) as infile:
    reader = csv.DictReader(infile)
    for line in reader:
        match_id = line["match_id"]
        matches[match_id]["features"].append(
            [float(line[key]) for key in FEATURE_KEYS]
        )
        matches[match_id]["labels"].append(
            [float(line[key]) for key in LABELS_KEYS]
        )

In [4]:
# now turn it into a matrix
xtrain, ytrain = [], []
xtest, ytest = [], []

train_ids = sample(matches.keys(), int(0.8 * len(matches)))
for match_id in matches:
    if match_id in train_ids:
        xtrain += matches[match_id]["features"]
        ytrain += matches[match_id]["labels"]
    else:
        xtest += matches[match_id]["features"]
        ytest += matches[match_id]["labels"]
        
xtrain, ytrain = np.array(xtrain), np.array(ytrain)
xtest, ytest = np.array(xtest), np.array(ytest)

In [5]:
# Build neural network
net = tflearn.input_data(shape=[None, len(FEATURE_KEYS)])
net = tflearn.fully_connected(net, 256)
net = tflearn.fully_connected(net, 256)
net = tflearn.fully_connected(net, 256)
net = tflearn.fully_connected(net, 3)
net = tflearn.regression(net, optimizer='adam', loss='mean_square')

In [6]:
# train the model
model = tflearn.DNN(net)
model.fit(xtrain, ytrain, n_epoch=10, batch_size=256, show_metric=True)

Training Step: 2630  | total loss: [1m[32m0.23245[0m[0m
| Adam | epoch: 010 | loss: 0.23245 - acc: 0.3933 -- iter: 67221/67221
Training Step: 2630  | total loss: [1m[32m0.23245[0m[0m
| Adam | epoch: 010 | loss: 0.23245 - acc: 0.3933 -- iter: 67221/67221
--


In [13]:
# evaluate model on test data
model.evaluate(xtest, ytest)

[0.57435141509433962]

In [12]:
model.predict(xtest)

[[1.5363528728485107, 1.5257630348205566, 1.270444393157959],
 [1.5316331386566162, 1.5418180227279663, 1.7537128925323486],
 [1.5393195152282715, 1.5225259065628052, 1.2593188285827637],
 [1.508020281791687, 1.5200977325439453, 1.7610485553741455],
 [1.563252329826355, 1.547565221786499, 1.2658370733261108],
 [1.5316331386566162, 1.5418180227279663, 1.7537128925323486],
 [1.5035866498947144, 1.4936517477035522, 1.2474184036254883],
 [1.4978814125061035, 1.5056118965148926, 1.7272802591323853],
 [1.5698072910308838, 1.561073899269104, 1.326265573501587],
 [1.4603885412216187, 1.4687875509262085, 1.6957629919052124],
 [1.5287891626358032, 1.5194926261901855, 1.2281360626220703],
 [1.5095598697662354, 1.521889567375183, 1.7273515462875366],
 [1.4705499410629272, 1.4605499505996704, 1.2322888374328613],
 [1.4843215942382812, 1.4938271045684814, 1.7022814750671387],
 [1.5171465873718262, 1.5054365396499634, 1.2724173069000244],
 [1.5170880556106567, 1.5259383916854858, 1.7253074645996094],