# Environment Setup & Data Preparation

In [None]:
# Google Drive Mounting
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# PySpark Installation
!pip install pyspark

Collecting pyspark
  Downloading pyspark-3.2.0.tar.gz (281.3 MB)
[K     |████████████████████████████████| 281.3 MB 37 kB/s 
[?25hCollecting py4j==0.10.9.2
  Downloading py4j-0.10.9.2-py2.py3-none-any.whl (198 kB)
[K     |████████████████████████████████| 198 kB 42.6 MB/s 
[?25hBuilding wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.2.0-py2.py3-none-any.whl size=281805912 sha256=51e67c1c96a86e0e9f0dd491c7a78c847c2f6f02711880425179c284cdc7711a
  Stored in directory: /root/.cache/pip/wheels/0b/de/d2/9be5d59d7331c6c2a7c1b6d1a4f463ce107332b1ecd4e80718
Successfully built pyspark
Installing collected packages: py4j, pyspark
Successfully installed py4j-0.10.9.2 pyspark-3.2.0


In [None]:
# Libraries to be used
from pyspark import SparkContext
import json
from itertools import combinations

In [None]:
# sc.stop()

In [None]:
# Spark Context Initialization
sc = SparkContext()

In [None]:
# Reading Data into an RDD
filtered_match_RDD = sc.textFile("/content/drive/MyDrive/FilteredMatches/patch23matches.txt").filter(lambda x : x != 'None')

In [None]:
filtered_match_RDD.count()

72455

# Requierments

## Champion Win Rate

In [None]:
# Champion Name Function
# it returns a list of all the matches the champion has palyed and whether won or lost
def champion(line):
  champs = []
  try:
    match = json.loads(line)
    for i in range(10):
        champName = match["info"]["participants"][i]["championName"]
        is_win = int(match["info"]["participants"][i]["win"])
        champ = (champName,is_win)
        champs.append(champ)
  except:
    return champs
  return champs

In [None]:
# Getting champions' names and winning status
champ_RDD = filtered_match_RDD.flatMap(champion)

# Filtering the Champions RDD according to their winning status
win_champ_RDD = champ_RDD.filter(lambda x:x[1] == 1)

# Grouping both RDDs by key
grouped_champ_RDD = champ_RDD.groupByKey()
grouped_win_champ_RDD = win_champ_RDD.groupByKey()

# Overwriting the RDDs to make them champion names and numbers
simple_grouped_champ_RDD = grouped_champ_RDD.map(lambda x: (x[0], len(list(x[1]))))
simple_grouped_win_champ_RDD = grouped_win_champ_RDD.map(lambda x: (x[0], len(list(x[1]))))

# Joining the two RDDs
joined = simple_grouped_win_champ_RDD.join(simple_grouped_champ_RDD)

# Claculating champion win rate
champ_win_rate = joined.map(lambda x: (x[0],(x[1][0] / x[1][1]) * 100 ))

In [None]:
champ_win_rate.take(5)

[('Lulu', 50.54357512029941),
 ('Mordekaiser', 49.712753734201456),
 ('Tryndamere', 49.715909090909086),
 ('Yasuo', 50.457553146557785),
 ('Yorick', 54.438860971524285)]

## Champion Pick rate

In [None]:
# Champion pick function
def champion_pick(line):
  champs = []
  try:
    match = json.loads(line)
    for i in range(10):
        champName = match["info"]["participants"][i]["championName"]
        champs.append((champName, 1))
  except:
    return champs
  return champs

In [None]:
# Getting champions pick status
picked_champs = filtered_match_RDD.flatMap(champion_pick)

# Grouping by key
grouped_picked_champs = picked_champs.groupByKey()

# Overwriting the RDD to make it champion names and numbers
simple_grouped_picked_champs = grouped_picked_champs.map(lambda x: (x[0], len(list(x[1]))))

# Getting the number of matches
num_of_matches = filtered_match_RDD.count()

# Calculating the champion pick rate
# We divide by the number of matches because a champion can only be picked once in a match
champs_pick_rate = simple_grouped_picked_champs.map(lambda x : (x[0], (x[1] / num_of_matches) * 100))

In [None]:
champs_pick_rate.take(5)

[('Ezreal', 14.655993375198397),
 ('Lulu', 7.744117038161617),
 ('Mordekaiser', 7.207232075081086),
 ('Tryndamere', 8.258919329238838),
 ('Yasuo', 19.606652404941)]

## Champion Ban Rate

In [None]:
# Banned champion function
def champion_ban(line):
  champs = []
  try:
    match = json.loads(line)
    for i in range(5):
        champName = match["info"]["teams"][0]['bans'][i]['championId']
        champs.append((champName, 1))
        champName = match["info"]["teams"][1]['bans'][i]['championId']
        champs.append((champName, 1))
  except:
    return champs
  return champs

In [None]:
# Getting an RDD of the banned champions
banned_champs = filtered_match_RDD.flatMap(champion_ban)

# Grouping the banned champions RDD by key
grouped_banned_champs = banned_champs.groupByKey()

# Simplyfying the RDD
simple_grouped_banned_champs = grouped_banned_champs.map(lambda x: (x[0], len(list(x[1]))))

# Getting double the number of matches
double_num_of_matches = 2 * filtered_match_RDD.count()

# Calculating the champion ban rate
# We divide by the double of number of matches because a champion can only be banned twice in a match
champs_ban_rate = simple_grouped_banned_champs.map(lambda x : (x[0], (x[1] / double_num_of_matches) * 100))

In [None]:
champs_ban_rate.take(5)

[(240, 0.4575253605686288),
 (360, 5.1521634117728246),
 (120, 2.4856807673728523),
 (121, 4.431716237664758),
 (1, 0.9046994686357048)]

## Champions Synergy

In [None]:
# Champion pairs function
def Pairs(line):
  winners = []
  losers = []
  champions = []
  try:
    match = json.loads(line)
    for i in range(10):
      if match["info"]["participants"][i]["win"]:
        winners.append(match["info"]["participants"][i]["championName"])
      else:
        losers.append(match["info"]["participants"][i]["championName"])
    winning_pair = list(combinations(sorted(winners), 2))
    losing_pair = list(combinations(sorted(losers), 2))
    for i in range(10):
      winning_pair[i] = (winning_pair[i][0] + '_' + winning_pair[i][1], 1)
      losing_pair[i] = (losing_pair[i][0] + '_' + losing_pair[i][1], 0)
    champions.extend(winning_pair)
    champions.extend(losing_pair)
  except:
    return champions
  return champions

In [None]:
# Getting champion pairs RDD
champs_pairs_0 = data.flatMap(Pairs)

# Getting winning champion pairs RDD
winners_pairs_0 = champs_pairs_0.filter(lambda x : x[1] == 1)

# Group champion pairs and winning champion pairs by key
champs_pairs_1 = champs_pairs_0.groupByKey()
winners_pairs_1 = winners_pairs_0.groupByKey()

# Simplyfying the RDDs
champs_pairs_2 = champs_pairs_1.map(lambda x: (x[0], len(list(x[1]))))
winners_pairs_2 = winners_pairs_1.map(lambda x: (x[0], len(list(x[1]))))

# Joining the RDDs
pairs_0 = winners_pairs_2.join(champs_pairs_2)

# Calculating the pairs win rate
pairs_win_rate = pairs_0.map(lambda x: (x[0],(x[1][0] / x[1][1]) * 100 ))

# Calculating the pairs pick rate
pairs_pick_rate = champs_pairs_2.map(lambda x : (x[0], (x[1] / num_of_matches) * 100))

# Champions synergy
pairs_synergy_0 = pairs_win_rate.join(pairs_pick_rate)
pairs_synergy_1 = pairs_synergy_0.map(lambda x: (x[0],((x[1][0] * 0.3) + (x[1][1] * 0.7)))

In [None]:
champions_win_rate.take(5)

[('FiddleSticks_Pyke', 41.935483870967744),
 ('Jax_Jhin', 51.63316582914573),
 ('LeeSin_Velkoz', 44.73684210526316),
 ('Chogath_Katarina', 53.84615384615385),
 ('Ashe_Xerath', 49.473684210526315)]

## Item win rate

In [None]:
# Item win function
def item(line):
  items = []
  try:
    match = json.loads(line)
    for i in range(10):
      for j in range(7):
        item = match["info"]["participants"][i]["item" + str(j)]
        is_win = int(match["info"]["participants"][i]["win"])
        key = (item, is_win)
        items.append(key)
  except:
    return items
  return items

In [None]:
# Getting the items RDD
item_RDD = filtered_match_RDD.flatMap(item)

# Gettting the wim items RDD
win_item_RDD = item_RDD.filter(lambda x : x[1] == 1)

# Grouping both the RDDs by key
grouped_item_RDD = item_RDD.groupByKey()
grouped_win_item_RDD = win_item_RDD.groupByKey()

# Simplyfying the RDDs
simple_grouped_item_RDD = grouped_item_RDD.map(lambda x: (x[0], len(list(x[1]))))
simple_grouped_win_item_RDD = grouped_win_item_RDD.map(lambda x: (x[0], len(list(x[1]))))

# Joining the two RDDs
joined = simple_grouped_win_item_RDD.join(simple_grouped_item_RDD)

# Calculating the items win rate
item_win_rate = joined.map(lambda x : (x[0], (x[1][0] / x[1][1]) * 100 ))

In [None]:
item_win_rate.take(5)

[(0, 43.013918320280816),
 (3121, 54.59053556248229),
 (3363, 52.60964912280702),
 (3123, 41.106128550074736),
 (2403, 34.68208092485549)]

## Item pick rate

In [None]:
# Getting the items RDD
item_RDD = filtered_match_RDD.flatMap(item)

# Grouping both the RDDs by key
grouped_item_RDD = item_RDD.groupByKey()

# Simplyfying the RDDs
simple_grouped_item_RDD = grouped_item_RDD.map(lambda x: (x[0], len(list(x[1]))))

# Calculating the number of all items
# it's seventy times teh number of matches as each player can have seven items and 10 players in the match
num_of_items = 70 * filtered_match_RDD.count()

# Calculating the items pick rate
item_pick_rate = simple_grouped_item_RDD.map(lambda x : (x[0], (x[1] / num_of_items) * 100))

In [None]:
item_pick_rate.take(5)

[(0, 8.532172678608397),
 (3001, 0.3389690152508454),
 (3121, 0.13916026696373118),
 (1082, 0.5079606060904798),
 (3363, 1.8880684562832102)]

## Item and Champion Synergy

In [None]:
# Item Champion function
# It returns a list of keys of the champion and items used in the match
def item_champ(line):
  champs_items = []
  try:
    match = json.loads(line)
    for i in range(10):
      champ = match["info"]["participants"][i]['championName']
      is_win = int(match["info"]["participants"][i]["win"])
      for j in range(7):
        item = match["info"]["participants"][i]["item" + str(j)]
        key = (champ + "|" + str(item), is_win)
        champs_items.append(key)
  except:
    return champs_items
  return champs_items

In [None]:
# Getting the matches the champion has won with that item
win_champ_item_RDD = filtered_match_RDD.flatMap(item_champ).filter(lambda x : x[1] == 1)

# Getting the matches the champion has lost with that item
lose_champ_item_RDD = filtered_match_RDD.flatMap(item_champ).filter(lambda x : x[1] == 0)

# Getting all the matches the champion has played and won regardless of the item
win_champ_RDD = filtered_match_RDD.flatMap(champion).filter(lambda x : x[1] == 1)

# Getting all the matches the champion has played and lost regardless of the item
lose_champ_RDD = filtered_match_RDD.flatMap(champion).filter(lambda x : x[1] == 0)

# Grouping the RDDs by keys
grouped_win_champ_item_RDD = win_champ_item_RDD.groupByKey()
grouped_win_champ_RDD = win_champ_RDD.groupByKey()
grouped_lose_champ_item_RDD = lose_champ_item_RDD.groupByKey()
grouped_lose_champ_RDD = lose_champ_RDD.groupByKey()


# Simplyfying the RDDs
simple_grouped_win_champ_item_RDD = grouped_win_champ_item_RDD.map(lambda x : (x[0], len(list(x[1]))))
simple_grouped_win_champ_RDD = grouped_win_champ_RDD.map(lambda x : (x[0], len(list(x[1]))))
simple_grouped_lose_champ_item_RDD = grouped_lose_champ_item_RDD.map(lambda x : (x[0], len(list(x[1]))))
simple_grouped_lose_champ_RDD = grouped_lose_champ_RDD.map(lambda x : (x[0], len(list(x[1]))))

# Separate the champion name from the item id
simple_grouped_win_champ_item_RDD = simple_grouped_win_champ_item_RDD.map(lambda x : (x[0].split('|'), x[1]))
simple_grouped_lose_champ_item_RDD = simple_grouped_lose_champ_item_RDD.map(lambda x : (x[0].split('|'), x[1]))

# Make the champion name only the key
simple_grouped_win_champ_item_RDD = simple_grouped_win_champ_item_RDD.map(lambda x : (x[0][0], (x[0][1], x[1])))
simple_grouped_lose_champ_item_RDD = simple_grouped_lose_champ_item_RDD.map(lambda x : (x[0][0], (x[0][1], x[1])))

# Joining the RDDs
joined_win = simple_grouped_win_champ_item_RDD.join(simple_grouped_win_champ_RDD)
joined_lose = simple_grouped_lose_champ_item_RDD.join(simple_grouped_lose_champ_RDD)

# Calculate the champion with item win and lose rate which indicates the synergy between them
champ_item_win_rate = joined_win.map(lambda x : (x[0] + '|' + x[1][0][0], (x[1][0][1] / x[1][1]) * 100))
champ_item_lose_rate = joined_lose.map(lambda x : (x[0] + '|' + x[1][0][0], (x[1][0][1] / x[1][1]) * 100))

# Calculate synergy
champ_item_synergy = champ_item_win_rate.join(champ_item_lose_rate)
champ_item_synergy = champ_item_synergy.map(lambda x : (x[0], (x[1][0] - x[1][1])))

# Separate the key to be the champion only
champ_item_synergy = champ_item_synergy.map(lambda x : (x[0].split('|'), x[1]))
champ_item_synergy_keyed = champ_item_synergy.map(lambda x : (x[0][0], (x[0][1], x[1])))

# Sorting
synergy_sorted = champ_item_synergy_keyed.sortBy(lambda x : x[1][1], ascending = False)


## Item and Class synergy

In [None]:
def item_lane(line):
  items_lane = []
  try:
    match = json.loads(line)
    for i in range(10):
      lane = match["info"]["participants"][i]['lane']
      is_win = int(match["info"]["participants"][i]["win"])
      for j in range(7):
        item = match["info"]["participants"][i]["item" + str(j)]
        key = (lane + "|" + str(item), is_win)
        items_lane.append(key)
  except:
    return items_lane
  return items_lane

In [None]:
# Getting the class item RDD
class_item_RDD = filtered_match_RDD.flatMap(item_lane)

# Getting the winning class item RDD
win_class_item_RDD = class_item_RDD.filter(lambda x : x[1] == 1)

# Grouping both the RDDs by keys
grouped_class_item_RDD = class_item_RDD.groupByKey()
grouped_win_class_item_RDD = win_class_item_RDD.groupByKey()

# Simplyfying the RDDs
simple_grouped_class_item_RDD = grouped_class_item_RDD.map(lambda x : (x[0], len(list(x[1]))))
simple_grouped_win_class_item_RDD = grouped_win_class_item_RDD.map(lambda x : (x[0], len(list(x[1]))))

# Joining the two RDDs
joined_class_item = simple_grouped_win_class_item_RDD.join(simple_grouped_class_item_RDD)

# Calculating the item win rate associated with each class which indicates the class and item synergy
class_item_win_rate = joined_class_item.map(lambda x : (x[0], (x[1][0] / x[1][1]) * 100 ))

In [None]:
class_item_win_rate.take(5)

[('NONE|2033', 48.06041881222108),
 ('MIDDLE|4633', 51.859196826970745),
 ('NONE|3066', 67.98418972332016),
 ('MIDDLE|7023', 58.333333333333336),
 ('BOTTOM|3026', 58.98979968047192)]

## Item Suggestion
It's based on the champion item synergy the item with highest win rate to with the champion is suggested to him.

In [None]:
#### Getting the RDD of synergies
# Getting the matches the champion has won with that item
win_champ_item_RDD = filtered_match_RDD.flatMap(item_champ).filter(lambda x : x[1] == 1)

# Getting the matches the champion has lost with that item
lose_champ_item_RDD = filtered_match_RDD.flatMap(item_champ).filter(lambda x : x[1] == 0)

# Getting all the matches the champion has played and won regardless of the item
win_champ_RDD = filtered_match_RDD.flatMap(champion).filter(lambda x : x[1] == 1)

# Getting all the matches the champion has played and lost regardless of the item
lose_champ_RDD = filtered_match_RDD.flatMap(champion).filter(lambda x : x[1] == 0)

# Grouping the RDDs by keys
grouped_win_champ_item_RDD = win_champ_item_RDD.groupByKey()
grouped_win_champ_RDD = win_champ_RDD.groupByKey()
grouped_lose_champ_item_RDD = lose_champ_item_RDD.groupByKey()
grouped_lose_champ_RDD = lose_champ_RDD.groupByKey()


# Simplyfying the RDDs
simple_grouped_win_champ_item_RDD = grouped_win_champ_item_RDD.map(lambda x : (x[0], len(list(x[1]))))
simple_grouped_win_champ_RDD = grouped_win_champ_RDD.map(lambda x : (x[0], len(list(x[1]))))
simple_grouped_lose_champ_item_RDD = grouped_lose_champ_item_RDD.map(lambda x : (x[0], len(list(x[1]))))
simple_grouped_lose_champ_RDD = grouped_lose_champ_RDD.map(lambda x : (x[0], len(list(x[1]))))

# Separate the champion name from the item id
simple_grouped_win_champ_item_RDD = simple_grouped_win_champ_item_RDD.map(lambda x : (x[0].split('|'), x[1]))
simple_grouped_lose_champ_item_RDD = simple_grouped_lose_champ_item_RDD.map(lambda x : (x[0].split('|'), x[1]))

# Make the champion name only the key
simple_grouped_win_champ_item_RDD = simple_grouped_win_champ_item_RDD.map(lambda x : (x[0][0], (x[0][1], x[1])))
simple_grouped_lose_champ_item_RDD = simple_grouped_lose_champ_item_RDD.map(lambda x : (x[0][0], (x[0][1], x[1])))

# Joining the RDDs
joined_win = simple_grouped_win_champ_item_RDD.join(simple_grouped_win_champ_RDD)
joined_lose = simple_grouped_lose_champ_item_RDD.join(simple_grouped_lose_champ_RDD)

# Calculate the champion with item win and lose rate which indicates the synergy between them
champ_item_win_rate = joined_win.map(lambda x : (x[0] + '|' + x[1][0][0], (x[1][0][1] / x[1][1]) * 100))
champ_item_lose_rate = joined_lose.map(lambda x : (x[0] + '|' + x[1][0][0], (x[1][0][1] / x[1][1]) * 100))

# Calculate synergy
champ_item_synergy = champ_item_win_rate.join(champ_item_lose_rate)
champ_item_synergy = champ_item_synergy.map(lambda x : (x[0], (x[1][0] - x[1][1])))

# Separate the key to be the champion only
champ_item_synergy = champ_item_synergy.map(lambda x : (x[0].split('|'), x[1]))
champ_item_synergy_keyed = champ_item_synergy.map(lambda x : (x[0][0], (x[0][1], x[1])))

# Sorting
synergy_sorted = champ_item_synergy_keyed.sortBy(lambda x : x[1][1], ascending = False)



#### Suggestion
# suggest the item with highest synergy for the champion

## Longest Time Spent Living

In [None]:
# Champ Time function
# It returns the longest time spent living by each champion in the game
def champ_time(line):
  champ_times = []
  try:
    match = json.loads(line)
    for i in range(10):
      longest_time = match["info"]["participants"][i]["longestTimeSpentLiving"]
      champ = match["info"]["participants"][i]['championName']
      champ_longest_time = (champ, longest_time)
      champ_times.append(champ_longest_time)
  except:
    return champ_times
  return champ_times

In [None]:
# Getting the champion and its longest time RDD
champ_longest_time_RDD  = filtered_match_RDD.flatMap(champ_time)

# Grouping by key
grouped_champ_longest_time_RDD = champ_longest_time_RDD.groupByKey()

# Calculated the average longest time for each champion
champ_average_longest_time_RDD = grouped_champ_longest_time_RDD.map(lambda x : (x[0], sum(list(x[1])) / len(list(x[1]))))

# Sorting the averages
champ_average_longest_time_sorted_RDD = champ_average_longest_time_RDD.sortBy(lambda x : x[1])

In [None]:
champ_average_longest_time_sorted_RDD.take(50)

## Physical VS Magical Damage

In [None]:
# Champion Physical Damage function
# It returns the pair of the champion name and its physical damage in the match
def champ_physical_damage(line):
  champs_physical_damage = []
  try :
    match = json.loads(line)
    for i in range(10):
      champ_name = match["info"]["participants"][i]['championName']
      physical_damage = match["info"]["participants"][i]['physicalDamageDealt']
      key = (champ_name, physical_damage)
      champs_physical_damage.append(key)
  except :
    return champs_physical_damage
  return champs_physical_damage




# Champion Magical Damage function
# It returns the pair of the champion name and its magical damage in the match
def champ_magical_damage(line):
  champs_magical_damage = []
  try :
    match = json.loads(line)
    for i in range(10):
      champ_name = match["info"]["participants"][i]['championName']
      magical_damage = match["info"]["participants"][i]['magicDamageDealt']
      key = (champ_name, magical_damage)
      champs_magical_damage.append(key)
  except :
    return champs_magical_damage
  return champs_magical_damage

In [None]:
# Getting the champions and their damage RDDs
champs_physical_damage_RDD = filtered_match_RDD.flatMap(champ_physical_damage)
champs_magical_damage_RDD = filtered_match_RDD.flatMap(champ_magical_damage)

# Grouping the RDDs by keys
grouped_champs_physical_damage_RDD = champs_physical_damage_RDD.groupByKey()
grouped_champs_magical_damage_RDD = champs_magical_damage_RDD.groupByKey()

# Averaging the RDDs
champs_average_physical_damage_RDD = grouped_champs_physical_damage_RDD.map(lambda x : (x[0], sum(list(x[1])) / len(list(x[1]))))
champs_average_magical_damage_RDD = grouped_champs_magical_damage_RDD.map(lambda x : (x[0], sum(list(x[1])) / len(list(x[1]))))

# Joining the two RDDs
# When comparing the average physical and magical damage of the champion we can find out its strengh
champs_damage_RDD = champs_average_physical_damage_RDD.join(champs_average_magical_damage_RDD)

# Dividing the physical damage by the magical damage
# for each champion we have an indicator for its physical damage to its magical damage
champs_damage_RDD = champs_damage_RDD.map(lambda x : (x[0], (x[1][0]  x[1][1])))

In [None]:
champs_damage_RDD.take(5)

[('Lulu', -9380.487791837462),
 ('Mordekaiser', -90139.65549597856),
 ('Tryndamere', 165718.5915775401),
 ('Yasuo', 141286.7749542447),
 ('Yorick', 90489.63484087103)]