In [1]:
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np
np.set_printoptions(suppress=True)
import scipy.spatial
import scipy.optimize
import json
import cassiopeia as cass
from cassiopeia import Position, Match
from sklearn import svm
from sklearn import tree
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score

def pairwise(iterable):
    "s -> (s0, s1), (s2, s3), (s4, s5), ..."
    a = iter(iterable)
    return zip(a, a)

In [2]:
VERIFICATION_SET_FILENAME = "./verification_set.csv"
FEATURES_FILENAME = "./role_features.json"

In [3]:
def load_verificaiton_set(verification_set_filename):
    with open(verification_set_filename) as f:
        training = f.readlines()
        training = [line.strip().split(',') for line in training]
        header = training[0]
        training = training[1:]
        
    header = header[1:-1]  # The last field is the game Id, the first is the row index
    header = [int(pid) for pid in header]
    resort = np.argsort(header)
    
    training = np.array(training)
    match_ids = training[:, -1]
    training = training[:, 1:-1]
    training = training[:, resort]
    training = training.tolist()
    
    assert len(match_ids) == len(training)
    
    # Convert to Cass Position enums
    positions_converter = {
        'carry': Position.bottom,
        'toplaner': Position.top,
        'midlaner': Position.middle,
        'support': Position.utility,
        'jungler': Position.jungle
    }
    good_data, bad_indices = [], []
    for i, line in enumerate(training):
        try:
            good_data.append(
                [positions_converter[p] for p in line]
            )
        except KeyError:
            bad_indices.append(i)
    training = np.array(good_data)
    match_ids = np.array([match_id for i, match_id in enumerate(match_ids) if i not in bad_indices])
    
    match_ids = np.array(match_ids)
    match_ids = match_ids.astype(np.float)
    match_ids = match_ids.astype(int)
    
    assert len(match_ids) == len(training)
    
    return training, match_ids
#load_verificaiton_set(VERIFICATION_SET_FILENAME);

In [41]:
# We need a ton of helper functions in order to calculate the feature list

# These convert x,y positions to distances to top, mid, and bot lanes, which provides a better metric of position

(XMIN, YMIN), (XMAX, YMAX) = (358, 461), (14589, 14673) # From another notebook. They were calculated by pulling a bunch of matches and finding the min/max positions of all events that occured in those matches. The goal was to get a bounding bot for highest/lowest position on the map for but x and y directions. This process yielded the numbers on this line.
top_lane_position = np.array((XMIN, YMAX))
mid_lane_position = np.array((XMAX/2, YMAX/2))
bot_lane_position = np.array((XMAX, YMIN))

def get_top_mid_bot_dists(positions):
    to_top = scipy.spatial.distance.cdist([top_lane_position], positions)[0]
    to_mid = scipy.spatial.distance.cdist([mid_lane_position], positions)[0]
    to_bot = scipy.spatial.distance.cdist([bot_lane_position], positions)[0]
    return np.stack((to_top, to_mid, to_bot)).flatten().tolist()

In [5]:
# These constants and functions are used in the feature generation

STARTING_ITEMS = sorted((
    "Boots of Speed",
    "Faerie Charm",
    "Rejuvenation Bead",
    "Sapphire Crystal",
    "Ruby Crystal",
    "Cloth Armor",
    "Null-Magic Mantle",
    "Long Sword",
    "Hunter's Talisman",
    "Hunter's Machete",
    "Dagger",
    "Brawler's Gloves",
    "Amplifying Tome",
    "Doran's Shield",
    "Doran's Blade",
    "Doran's Ring",
    "The Dark Seal",
    "Cull",
    "Ancient Coin",
    "Relic Shield",
    "Spellthief's Edge",
    "Corrupting Potion"
))

SUMMONER_SPELLS = cass.get_summoner_spells("NA")
SUMMONER_SPELLS = sorted([spell.id for spell in SUMMONER_SPELLS if cass.GameMode.classic in spell.modes])
CHAMPIONS = cass.get_champions("NA")
CHAMPIONS = sorted([champion.id for champion in CHAMPIONS])

print(len(STARTING_ITEMS), len(SUMMONER_SPELLS), len(CHAMPIONS))

22 9 143


In [43]:
# Now we can create the get_features function, which relies on the previous functionality

def _to_one_hot(features, nfeatures):
    new = np.zeros((nfeatures,), dtype=bool)
    for i in features:
        new[i] = True
    return new

def _feature_CSat12(participant):
    return [participant.cumulative_timeline["12:00"].creep_score]

def _feature_summoner_spells(participant):
    spells = [SUMMONER_SPELLS.index(participant.summoner_spell_d.id),
              SUMMONER_SPELLS.index(participant.summoner_spell_f.id)]
    spells = _to_one_hot(spells, len(SUMMONER_SPELLS))
    return spells

def _feature_starting_item(participant):
    # Sometimes the participant won't have bought an item at 2:00 yet. In that case, set to -1.
    item_index = next(iter([STARTING_ITEMS.index(item.name) for item in
        sorted([item for item in participant.cumulative_timeline["2:00"].items
                if item.name in STARTING_ITEMS],
               key=lambda item: item.gold.total)
    ]), -1)
    
    # Manually do the one-hot encoding because of the special case for 0.
    starting_item_feature = np.zeros((len(STARTING_ITEMS),), dtype=bool)
    if item_index >= 0:
        starting_item_feature[item_index] = True
    assert sum(starting_item_feature) in (0, 1)
    
    return starting_item_feature

def _feature_positions(participant):
    nframes = 11
    positions = [(frame.position.x, frame.position.y) for frame in participant.timeline.frames[1:-1]][:nframes]
    if len(positions) != nframes:
        raise ValueError("Not enough frames in match.")
    positions = get_top_mid_bot_dists(positions)
    positions = np.array(positions) / 1000
    positions = positions.tolist()
    return positions

def _feature_champion(participant):
    champion = CHAMPIONS.index(participant.champion.id)
    return [champion]

def _feature_XP(participant):
    nframes = 11
    xp = [frame.experience / 1000 for frame in participant.timeline.frames[1:-1]][:nframes]
    if len(xp) != nframes:
        raise ValueError("Not enough frames in match.")
    return xp

def get_features(participant):
    features = {
        "positions": _feature_positions(participant),
        "starting_item": _feature_starting_item(participant),
        "summoner_spells": _feature_summoner_spells(participant),
        "cs@12": _feature_CSat12(participant),
        "xp": _feature_XP(participant)
    }
    features = np.array((
        *features['cs@12'],
        *features['starting_item'],
        *features['summoner_spells'],
        *features['positions'],
        *features['xp']
    ))
    
    return features

In [52]:
# This is my gather data script. It isn't pretty but it works. It uses some of the functionality below.
# It also uses the training data here: https://github.com/Canisback/roleML/blob/master/verification_set.csv

def collect_team_data(team, roles):
    data = []
    for p, role in zip(team.participants, roles):
        features = get_features(p)
        data.append({
            "championId": p.champion.id,
            "role": role.value,
            "features": features.tolist()
        })
    return data


def gather_data(resources_filename, output_filename):
    match_roles, match_ids = load_verificaiton_set(resources_filename)

    save = []
    all_features = []
    for match_id, roles in zip(match_ids, match_roles):
        match_id = int(match_id)
        match = Match(id=match_id, region='EUW')
        blue_team, red_team = match.blue_team, match.red_team
        blue_roles = [roles[p.id - 1] for p in blue_team.participants]
        red_roles = [roles[p.id - 1] for p in red_team.participants]
        try:
            participant_data = collect_team_data(blue_team, blue_roles)
            blue_data = {
                "matchId": match_id,   
                "team": "BLUE",
                "participantData": participant_data
            }
            save.append(blue_data)
        except:
            pass
        try:
            participant_data = collect_team_data(red_team, red_roles)
            red_data = {
                "matchId": match_id,   
                "team": "RED",
                "participantData": participant_data
            }
            save.append(red_data)
        except:
            pass
        
        print(len(save)/2, len(match_ids))

    with open(output_filename, 'w') as f:
        json.dump(save, f)

#gather_data(VERIFICATION_SET_FILENAME, FEATURES_FILENAME)

In [45]:
# We'll split the features up by team in order to predict by-team rather than by-individual

# WARNING: This requires the training data to be set up in a specific way.
#  Namely, the participants in the data need to be in groups of 5 participants on the same team.
#  That array of features is flattened, but still needs to be 5 participants on the same team in a row

def split_features_by_team(features, teamsize=5):
    teams = []
    assert len(features) % teamsize == 0
    for i in range(0, len(features), teamsize):
        teams.append(features[i:i+teamsize])
    teams = np.array(teams)
    return teams

In [46]:
# Use the above functions to load the training data

LABELS = ['BOTTOM', 'JUNGLE', 'MIDDLE', 'TOP', 'UTILITY']

def load_training_data(filename):
    with open(filename) as f:
        data = json.load(f)
    
    features = []
    labels = []
    champion_ids = []
    for team in data:
        for p in team['participantData']:
            features.append(p['features'])
            labels.append(p['role'])
            champion_ids.append(p['championId'])
    features = np.array(features)
        
    subtract = np.zeros((features.shape[1]))
    divide = np.ones_like(subtract)
    print(subtract.shape)
    for j, col in enumerate(features.T):
        subtract[j] = col.min()
        divide[j] = col.max() - subtract[j]
        if divide[j] == 0:
            divide[j] = 1
    
    return features, labels, champion_ids

In [47]:
# Train the classifier

def train(features, labels):
    clf = svm.SVC(gamma='scale', probability=True)
    clf.fit(features, labels)
    return clf

In [53]:
# Functions for predicting the roles from a match's team

def predict_individuals(team, clf):
    result = clf.predict(team)
    return [LABELS[x] for x in result]

def predict_team(team, clf, verbose=False):
    assert len(team) == len(LABELS)
    probs = 1 - clf.predict_proba(team)
    print(probs)
    row_ind, result = scipy.optimize.linear_sum_assignment(probs)
    cost = probs[row_ind, result].sum() / len(team)
    if verbose:
        print(probs)
        print(result)
    return [LABELS[i] for i in result], cost

def get_roles(team):
    features = np.array([
        get_features(p) for p in team.participants
    ])
    
    roles, cost = predict_team(features, clf)
    result = {p: cass.Position(role) for p, role in zip(team.participants, roles)}
    return result, cost

In [49]:
features, labels, champion_ids = load_training_data(FEATURES_FILENAME)
print(np.mean(features, axis=0))
clf = train(features, labels)

(76,)
[49.55200683  0.00170794  0.03005978  0.00375747  0.          0.00102477
  0.07702818  0.00153715  0.00017079  0.21417592  0.13373185  0.10128096
  0.00102477  0.10623399  0.0852263   0.03569599  0.          0.00836892
  0.05311699  0.00153715  0.00563621  0.11426132  0.02134927  0.00614859
  0.04730999  0.99180188  0.00939368  0.20102477  0.20051238  0.17028181
  0.33287788  0.04064902 11.47515711 11.75073756 11.42649922 11.61379847
 11.82552619 11.88268817 11.94985986 11.87669553 11.71388671 11.82156623
 11.86736357  4.06160447  5.24514038  5.29536894  5.5024527   5.59441048
  5.55835968  5.49222493  5.51600642  5.50969593  5.57962089  5.46971392
  9.37937922  8.62578348  9.16702144  9.31413508  9.30015351  9.20684739
  9.10424935  9.22270453  9.36484923  9.33441796  9.24799074  0.00053681
  0.1849585   0.60669291  1.02009923  1.44191734  1.84028335  2.24632161
  2.69241452  3.11253271  3.54152229  3.99905807]


In [50]:
scores = cross_val_score(clf, features, labels, cv=5)
print(scores)
scores *= 100
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

[0.99744681 0.9965812  0.9991453  0.9991453  0.9991453 ]
Accuracy: 99.83 (+/- 0.22)


In [54]:
# Finally, let's grab some data and predict!

cass.print_calls(False)
me = cass.Summoner(name="Kalturi", region="NA")
mh = me.match_history(queues={cass.Queue.ranked_solo_fives})
m = mh[0]
t = m.red_team
roles, cost = get_roles(t)

print(cost)
{p.champion.name: role for p, role in roles.items()}

[[0.00155725 0.99986402 0.99970421 0.99961111 0.9992634 ]
 [0.99870535 0.99911211 0.99867543 0.00410433 0.99940278]
 [0.99999935 0.99999929 0.99999975 0.99999972 0.00000189]
 [0.99808185 0.99947598 0.00371133 0.99921536 0.99951547]
 [0.99999944 0.00071293 0.99983701 0.99982135 0.99962927]]
0.0020175455450013534


{'Vayne': <Position.bottom: 'BOTTOM'>,
 'Sylas': <Position.top: 'TOP'>,
 'Braum': <Position.utility: 'UTILITY'>,
 'Annie': <Position.middle: 'MIDDLE'>,
 'Nocturne': <Position.jungle: 'JUNGLE'>}