In [62]:
# import libraries

import numpy as np
import pandas as pd
from pyspark.sql import SparkSession

In [63]:
# Initialize Spark session
session = SparkSession.builder.appName("LoLAnalyzer").getOrCreate()

In [64]:
# read in the LoL Champions csv file

df = session.read.csv("LoL_champions.csv",header=True,inferSchema=True)
df.printSchema()

root
 |-- Name: string (nullable = true)
 |-- Tags: string (nullable = true)
 |-- Role: string (nullable = true)
 |-- Range type: string (nullable = true)
 |-- Resourse type: string (nullable = true)
 |-- Base HP: integer (nullable = true)
 |-- HP per lvl: integer (nullable = true)
 |-- Base mana: integer (nullable = true)
 |-- Mana per lvl: double (nullable = true)
 |-- Movement speed: integer (nullable = true)
 |-- Base armor: integer (nullable = true)
 |-- Armor per lvl: double (nullable = true)
 |-- Base magic resistance: integer (nullable = true)
 |-- Magic resistance per lvl: double (nullable = true)
 |-- Attack range: integer (nullable = true)
 |-- HP regeneration: double (nullable = true)
 |-- HP regeneration per lvl: double (nullable = true)
 |-- Mana regeneration: double (nullable = true)
 |-- Mana regeneration per lvl: double (nullable = true)
 |-- Attack damage: integer (nullable = true)
 |-- Attack damage per lvl: double (nullable = true)
 |-- Attack speed per lvl: double 

In [65]:
# SQL Transformer: Make DataFrame to analyze mages
from pyspark.ml.feature import SQLTransformer


viewHealthMana = SQLTransformer(
    statement="SELECT Name, Tags,`Base HP`,`Base Mana` FROM __THIS__ WHERE Tags='Mage' OR Tags LIKE '%Mage%'")


viewHealthMana_frame = viewHealthMana.transform(df)
viewHealthMana_frame.show()

+------------+-------------+-------+---------+
|        Name|         Tags|Base HP|Base Mana|
+------------+-------------+-------+---------+
|        Ahri|Mage,Assassin|    590|      418|
|      Anivia|         Mage|    550|      495|
|       Annie| Mage,Support|    560|      418|
|Aurelion Sol|         Mage|    620|      530|
|        Azir|Mage,Marksman|    550|      320|
|        Bard| Support,Mage|    630|      350|
|       Brand| Mage,Support|    570|      469|
|  Cassiopeia|         Mage|    630|      350|
|    Cho'Gath|    Tank,Mage|    644|      270|
|       Corki|Marksman,Mage|    640|      350|
|        Ekko|Assassin,Mage|    655|      280|
|       Elise|Assassin,Mage|    650|      324|
|     Evelynn|Assassin,Mage|    642|      315|
|      Ezreal|Marksman,Mage|    600|      375|
|Fiddlesticks| Mage,Support|    650|      500|
|       Galio|    Tank,Mage|    632|      500|
|      Gragas| Fighter,Mage|    640|      400|
|Heimerdinger| Mage,Support|    558|      385|
|        Hwei

In [66]:
# regression on viewHealthMana:
#   analyze whether more BaseHP correlates with less Base Mana
#   (one would argue more of one resource means the character has less of another)

from pyspark.ml.linalg import Vectors
from pyspark.ml.regression import LinearRegression
from pyspark.ml.feature import VectorAssembler


# explanatory/feauture var: BaseHP
#      convert to a vector for regression purposes, assemple
vec = VectorAssembler(inputCols=["Base HP"], outputCol="Explanatory")
assembled_data = vec.transform(viewHealthMana_frame)

# data is small, so use a regparam of 0.01 (too large risks overfitting)
# solver="normal" , no need for any iterative optimization with this smaller example
# Feauture = Base HP (can make it into more values for multi linear)
# response = Base Mana

linear_model = LinearRegression(
    featuresCol="Explanatory", 
    labelCol="Base HP", 
    predictionCol="Base Mana Predict", 
    regParam=0.01, 
    solver="normal"
)

ManaHPfit = linear_model.fit(assembled_data)
ManaHPfit


LinearRegressionModel: uid=LinearRegression_b3d8f15d5b0d, numFeatures=1

In [67]:
# SLR: using BaseHpPredict above, let's observe coefficients

print("coefficient:")
print(ManaHPfit.coefficients)
print("intercept:")
print(ManaHPfit.intercept)
print("p-value")
print(ManaHPfit.summary.pValues)



# shows the linear fit is Base Mana = 0.9997 * BaseHP + 0.1705
# positive correlation


coefficient:
[0.9997184227949548]
intercept:
0.17050078348792644
p-value
[0.0, 2.8679281172117044e-12]


In [68]:
# making predictions

# Make predictions with the assembled data
BaseHpPredict = ManaHPfit.transform(assembled_data)

# shows fitted results and predictions for given Mana values
BaseHpPredict.show()

+------------+-------------+-------+---------+-----------+-----------------+
|        Name|         Tags|Base HP|Base Mana|Explanatory|Base Mana Predict|
+------------+-------------+-------+---------+-----------+-----------------+
|        Ahri|Mage,Assassin|    590|      418|    [590.0]|590.0043702325113|
|      Anivia|         Mage|    550|      495|    [550.0]| 550.015633320713|
|       Annie| Mage,Support|    560|      418|    [560.0]|560.0128175486626|
|Aurelion Sol|         Mage|    620|      530|    [620.0]|619.9959229163599|
|        Azir|Mage,Marksman|    550|      320|    [550.0]| 550.015633320713|
|        Bard| Support,Mage|    630|      350|    [630.0]|629.9931071443094|
|       Brand| Mage,Support|    570|      469|    [570.0]|570.0100017766122|
|  Cassiopeia|         Mage|    630|      350|    [630.0]|629.9931071443094|
|    Cho'Gath|    Tank,Mage|    644|      270|    [644.0]|643.9891650634388|
|       Corki|Marksman,Mage|    640|      350|    [640.0]| 639.990291372259|