<a href="https://colab.research.google.com/github/moustafa-7/League-of-legends_Bigdata/blob/main/Bigdata_miniprojects.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q https://www-us.apache.org/dist/spark/spark-2.4.7/spark-2.4.7-bin-hadoop2.7.tgz
!tar xf spark-2.4.7-bin-hadoop2.7.tgz
!pip install -q findspark

In [2]:
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-2.4.7-bin-hadoop2.7"

In [3]:
import findspark
findspark.init()
import pyspark
import random

In [7]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("LOL").getOrCreate()
sc = spark.sparkContext
d = spark.read.json("drive/MyDrive/small_matches.json", multiLine=True)

In [8]:
no_matches = d.count()

## 1. Champions win - pick - ban rates

In [9]:
def champions(row):
  c = []
  bans = []
  for p in row["participants"]:
    if(p["teamId"] == "Blue"): team_number = 0
    else: team_number = 1
    c.append((p["championId"], row["teams"][team_number]["win"]))
  
  for t in row["teams"]:
    for b in t["bans"]:
      if(b["championId"] != None):
        c.append((b["championId"],"ban"))
      # else:
      #   c.append((b["championid"],"ban"))

  return c

In [10]:
champions = d.rdd.flatMap(champions)

In [11]:
import time
def lookup(champions):
  for c in champions.collect():
    print(c)
    time.sleep(0.1)

In [12]:
by_champion = champions.groupByKey().mapValues(list)

In [13]:
# for c, val in by_champion.collect():
#     print(c,val)
#     time.sleep(0.1)

In [14]:
def compute_champion_rates(val):
    ban_count = val.count("ban")
    win_count = val.count("Win")
    lose_count = val.count("Fail")
    total_played_games = win_count+lose_count
    if(total_played_games):
      return [ban_count/no_matches, win_count/total_played_games, lose_count/total_played_games]
    else:
      return [ban_count/no_matches, "never_played", "never_played"]

In [15]:
win_pick_ban_rates = by_champion.map(lambda x:(x[0], compute_champion_rates(x[1])))

In [None]:
for c in win_pick_ban_rates.collect():
    print(c)
    time.sleep(0.1)

## 2. Champion Synergies or Duos

In [17]:
import itertools
def champion_duos(row):
  duos = []
  team1 = []
  team2 = []
  is_team1_win = False
  is_team2_win = False
  for p in row["participants"]:
    if(p["teamId"] == "Blue"): 
      team_number = 0
    else: 
      team_number = 1
    
    if(team_number == 0):
      team1.append(p["championId"])
      is_team1_win = p["stats"]["win"]
    else:
      team2.append(p["championId"])
      is_team2_win = p["stats"]["win"]
  
  team1 = sorted(team1)
  team2 = sorted(team2)
  for c in list(itertools.combinations(team1, 2)):
    duos.append((c, 1))

  for c in list(itertools.combinations(team2, 2)):
    duos.append((c, 1))
  
  return duos

In [18]:
duos = d.rdd.flatMap(champion_duos)

In [None]:
from operator import add
duos_count = duos.reduceByKey(add)

In [None]:
# for c in duos_count.collect():
#     print(c)
#     time.sleep(0.01)

top_duos = duos_count.top(20, key=lambda x: x[1])
for c in top_duos:
    print(c)
    time.sleep(0.01)

(('Ezreal', 'Yuumi'), 40)
(('Ezreal', 'Sett'), 34)
(('Aphelios', 'Thresh'), 33)
(('Ezreal', 'Graves'), 32)
(('Aphelios', 'Sylas'), 29)
(('Bard', 'Ezreal'), 29)
(('Graves', 'Thresh'), 28)
(('Graves', 'Sylas'), 28)
(('Aphelios', 'Lulu'), 27)
(('Aphelios', 'Lee Sin'), 27)
(('Lee Sin', 'Thresh'), 26)
(('Thresh', 'Varus'), 25)
(('Elise', 'Ezreal'), 25)
(('Graves', 'Syndra'), 24)
(('Ezreal', 'Syndra'), 24)
(('Sylas', 'Thresh'), 24)
(('Jayce', 'Varus'), 24)
(('Ezreal', 'Renekton'), 24)
(('Ezreal', 'Lee Sin'), 23)
(('Lee Sin', 'Sylas'), 23)


## 3. Item Win - pick rates

In [None]:
def get_items(row):
  c = []
  bans = []
  for p in row["participants"]:
    for i in range(7):
      c.append((p["stats"]["item"+str(i)]["name"], p["stats"]["win"]))
  return c

In [None]:
items = d.rdd.flatMap(get_items).groupByKey().mapValues(list)

In [None]:

print(len(items.collect()))

190


In [None]:
def compute_item_rates(val):
  win_count = val.count(True)
  lose_count = val.count(False)
  total_played_games = win_count+lose_count
  return [win_count/total_played_games, len(val)/no_matches]

In [None]:
item_rates = items.map(lambda x:(x[0], compute_item_rates(x[1])))

In [None]:
print("item       win_rate        pick_rate")
for item in item_rates.collect():
    print(item)
    time.sleep(0.01)

## 4. Item synergies (with champion, with class)