In [1]:
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np
np.set_printoptions(suppress=True)
import scipy.spatial
import scipy.optimize
import json
import cassiopeia as cass
from cassiopeia import Position
from sklearn import svm
from sklearn import tree
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score

def pairwise(iterable):
    "s -> (s0, s1), (s2, s3), (s4, s5), ..."
    a = iter(iterable)
    return zip(a, a)

In [2]:
FILENAME = "../resources/role_features.json"
RESOURCES_FILENAME = "../resources/verification_set.csv"

In [3]:
# This is my gather data script. It isn't pretty but it works. It uses some of the functionality below.
# It also uses the training data here: https://github.com/Canisback/roleML/blob/master/verification_set.csv
def gather_data(resources_filename):
    positions_converter = {
        'carry': Position.bottom,
        'toplaner': Position.top,
        'midlaner': Position.middle,
        'support': Position.utility,
        'jungler': Position.jungle
    }

    def parse_line(line):
        line = line[1:-1]
        line = [positions_converter.get(p) for p in line]
        line.append(line.pop(1))
        return line

    with open(resources_filename) as f:
        training = f.readlines()
        training = [line.strip().split(',') for line in training]
        header = training[0]
        training = training[1:]
    print(header)
    print(training[0])
    match_ids = [int(float(line[-1])) for line in training]
    print(match_ids[0])

    save = []
    all_features = []
    for i, matchid in enumerate(match_ids):
        roles = parse_line(training[i])
        match = Match(id=matchid, region='EUW')
        for p in match.participants:
            pid = p.id - 1
            role = roles[pid]
            #print(p.champion.name, role)
            try:
                features = get_features(p)
                all_features.append(features)
                save.append((matchid, pid, p.champion.id, role.value, features.tolist()))

                # Check for errors
                if features[1] == 0:
                    # The summoner didn't buy any items???
                    print(match.id, p.summoner.name, p.champion.name, f"http://canisback.com:5050/game/EUW1/{match.id}")
            except:
                pass
        print(i, len(match_ids))

    all_features = np.array(all_features)
    print(all_features)

    with open(FILENAME, 'w') as f:
        json.dump(save, f)

In [4]:
# We need a ton of helper functions in order to calculate the feature list

# These convert x,y positions to distances to top, mid, and bot lanes, which provides a better metric of position

(XMIN, YMIN), (XMAX, YMAX) = (358, 461), (14589, 14673) # From another notebook. They were calculated by pulling a bunch of matches and finding the min/max positions of all events that occured in those matches. The goal was to get a bounding bot for highest/lowest position on the map for but x and y directions. This process yielded the numbers on this line.
top_lane_position = np.array((XMIN, YMAX))
mid_lane_position = np.array((XMAX/2, YMAX/2))
bot_lane_position = np.array((XMAX, YMIN))

def get_positions(vec):
    positions = vec[4:]
    positions = [(x,y) for x,y in pairwise(positions)]
    return positions

def get_top_mid_bot_dists(vec):
    pos = get_positions(vec)
    to_top = scipy.spatial.distance.cdist([top_lane_position], pos)[0] / 1000
    to_mid = scipy.spatial.distance.cdist([mid_lane_position], pos)[0] / 1000
    to_bot = scipy.spatial.distance.cdist([bot_lane_position], pos)[0] / 1000
    return np.stack((to_top, to_mid, to_bot)).flatten().tolist()

In [5]:
# These constants and functions are used in the feature generation and to convert the feature list to
#  a one-hot vector

STARTING_ITEMS = sorted((
    "Boots of Speed",
    "Faerie Charm",
    "Rejuvenation Bead",
    "Sapphire Crystal",
    "Ruby Crystal",
    "Cloth Armor",
    "Null-Magic Mantle",
    "Long Sword",
    "Hunter's Talisman",
    "Hunter's Machete",
    "Dagger",
    "Brawler's Gloves",
    "Amplifying Tome",
    "Doran's Shield",
    "Doran's Blade",
    "Doran's Ring",
    "The Dark Seal",
    "Cull",
    "Ancient Coin",
    "Relic Shield",
    "Spellthief's Edge",
    "Corrupting Potion"
))

SUMMONER_SPELLS = cass.get_summoner_spells("NA")
SUMMONER_SPELLS = sorted([spell.id for spell in SUMMONER_SPELLS if cass.GameMode.classic in spell.modes])
CHAMPIONS = cass.get_champions("NA")
CHAMPIONS = sorted([champion.id for champion in CHAMPIONS])

print(len(STARTING_ITEMS), len(SUMMONER_SPELLS), len(CHAMPIONS))

#        (
#            features['cs@12'],
#            features['starting_item'],
#            *features['summoner_spells'],
#            *features['positions']
#        )

def feature_to_one_hot(feature):
    starting_item_feature = np.zeros((len(STARTING_ITEMS),), dtype=bool)
    item_index = int(feature[1]) - 1
    if item_index >= 0:
        starting_item_feature[item_index] = True
    assert sum(starting_item_feature) in (0, 1)
    ss_feature = np.zeros((len(SUMMONER_SPELLS),), dtype=bool)
    ss_feature[int(feature[2])] = True
    ss_feature[int(feature[3])] = True
    assert sum(ss_feature) == 2
    new = np.concatenate((feature[0:1], starting_item_feature, ss_feature, feature[4:]))
    return new

22 9 143


In [6]:
# Now we can create the get_features function, which relies on the previous functionality

def get_features(participant):
    features = {
        #"champion": CHAMPIONS.index(participant.champion.id),
        "positions": [x for sublist in
                [(frame.position.x, frame.position.y) for frame in participant.timeline.frames[1:-1]][:14]
                for x in sublist
            ],
        "starting_item": next(iter([STARTING_ITEMS.index(item.name) + 1 for item in
                sorted([item for item in participant.cumulative_timeline["2:00"].items if item.name in STARTING_ITEMS], key=lambda item: item.gold.total)
            ]), 0),
        "summoner_spells": [SUMMONER_SPELLS.index(participant.summoner_spell_d.id), SUMMONER_SPELLS.index(participant.summoner_spell_f.id)],
        "cs@12": participant.cumulative_timeline["12:00"].creep_score
    }
    features = np.array(
        (
            #features['champion'],
            features['cs@12'],
            features['starting_item'],
            *features['summoner_spells'],
            *features['positions']
        )
    )
    d = get_top_mid_bot_dists(features)
    features = features[:4].tolist() + d
    features = np.array(features)
    
    features = feature_to_one_hot(features)
    
    return features

In [7]:
# We'll split the features up by team in order to predict by-team rather than by-individual

# WARNING: This requires the training data to be set up in a specific way.
#  Namely, the participants in the data need to be in groups of 5 participants on the same team.
#  That array of features is flattened, but still needs to be 5 participants on the same team in a row

def split_features_by_team(features, teamsize=5):
    teams = []
    assert len(features) % teamsize == 0
    for i in range(0, len(features), teamsize):
        # Make sure the full_data has the same match id for all 5 team members
        #match_ids = [full_data[j][0] for j in range(i, i+teamsize)]
        #assert all(x == match_ids[0] for x in match_ids)
        teams.append(features[i:i+teamsize])
    teams = np.array(teams)
    return teams

In [8]:
# Use the above functions to load the training data

LABELS = ['BOTTOM', 'JUNGLE', 'MIDDLE', 'TOP', 'UTILITY']

def load_training_data(filename):
    with open(filename) as f:
        full_data = json.load(f)
    # Drop matches where not all 10 participants were collected
    match_counter = Counter()
    for datum in full_data:
        match_counter[datum[0]] += 1
    skip = {match_id: count for match_id, count in match_counter.items() if count != 10}
    full_data = [datum for datum in full_data if datum[0] not in skip]

    # Convert x,y positions to distance to top, mid, bot
    for i, datum in enumerate(full_data):
        matchid, pid, cid, role, feat = datum
        d = get_top_mid_bot_dists(feat)
        feat = feat[:4] + d
        full_data[i] = [matchid, pid, cid, role, feat]
    
    # Load labels
    labels = []
    features = []
    for datum in full_data:
        matchid, pid, cid, role, feat = datum
        features.append(feat)
        labels.append(LABELS.index(role))
    features = np.array(features)
    labels = np.array(labels)
    one_hots = []
    for feature in features:
        one_hots.append(feature_to_one_hot(feature))
    features = np.array(one_hots)
    
    teams = split_features_by_team(features)
    
    return features, labels

In [9]:
# Train the classifier
def train(features, labels):
    clf = svm.SVC(gamma='scale', probability=True)
    clf.fit(features, labels)
    return clf

In [10]:
# Functions for predicting the roles from a match's team

def _predict_team(team, clf, verbose=False):
    assert len(team) == len(LABELS)
    probs = 1 - clf.predict_proba(team)
    row_ind, result = scipy.optimize.linear_sum_assignment(probs)
    cost = probs[row_ind, result].sum() / len(team)
    if verbose:
        print(probs)
        print(result)
    return [LABELS[i] for i in result], cost

def _predict_individuals(team, clf):
    result = clf.predict(team)
    return [LABELS[x] for x in result]

def predict_team(team, verbose=False):
    return _predict_team(team=team, clf=clf, verbose=verbose)  # abstract out the clf

def predict_individuals(team, verbose=False):
    return _predict_individuals(team=team, clf=clf)  # abstract out the clf

def get_roles(team):
    features = np.array([
        get_features(p) for p in team.participants
    ])
    
    roles, cost = predict_team(features)
    result = {p: cass.Position(role) for p, role in zip(team.participants, roles)}
    return result, cost

In [11]:
training_features, training_labels = load_training_data(FILENAME)
clf = train(training_features, training_labels)

In [12]:
# Finally, let's grab some data and predict!

cass.print_calls(False)
me = cass.Summoner(name="Kalturi", region="NA")
mh = me.match_history(queues={cass.Queue.ranked_solo_fives})
m = mh[0]
t = m.red_team
roles, cost = get_roles(t)

print(cost)
{p.champion.name: role for p, role in roles.items()}

0.002050240253452973


{'Vayne': <Position.bottom: 'BOTTOM'>,
 'Sylas': <Position.top: 'TOP'>,
 'Braum': <Position.utility: 'UTILITY'>,
 'Annie': <Position.middle: 'MIDDLE'>,
 'Nocturne': <Position.jungle: 'JUNGLE'>}