In [None]:
import $ivy.`org.vegas-viz::vegas:0.3.11`
import $ivy.`org.apache.spark::spark-core:2.4.0`
import $ivy.`org.apache.spark::spark-sql:2.4.0`
import $ivy.`org.vegas-viz::vegas-spark:0.3.11`


In [None]:
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
import vegas.sparkExt._
import vegas._
import org.apache.spark.sql.functions._

# Parameters

In [None]:
val CACHED_DATA_DIR = "/goalimpacct/spark_data_cache/result_parquet"

val TARGET_TOURNAMENTS = List(
    ("1.Bundesliga")
)

val TARGET_SEASONS = List(
    ("2018")
)

val TAKE_LAST_X_DATA = 1

# Helper Vars

In [None]:
val playtimeColumn = ":playtimeLast" + TAKE_LAST_X_DATA + "Matches"
val totalOffPointsColumn = ":totalOffPointsLast" + TAKE_LAST_X_DATA + "Matches"
val totalDefPointsColumn = ":totalDefPointsLast" + TAKE_LAST_X_DATA + "Matches"
val avgOffPointsColumn = ":avgOffPointsLast" + TAKE_LAST_X_DATA + "Matches"
val avgDefPointsColumn = ":avgDefPointsLast" + TAKE_LAST_X_DATA + "Matches"
val totalRankedOffPointsColumn = ":totalRankedOffPointsLast" + TAKE_LAST_X_DATA + "Matches"
val totalRankedDefPointsColumn = ":totalRankedDefPointsLast" + TAKE_LAST_X_DATA + "Matches"
val avgRankedOffPointsColumn = ":avgRankedOffPointsLast" + TAKE_LAST_X_DATA + "Matches"
val avgRankedDefPointsColumn = ":avgRankedDefPointsLast" + TAKE_LAST_X_DATA + "Matches"

# Create Spark Context

In [None]:
val conf = new SparkConf()
conf.setMaster(s"local[*]")
conf.setAppName("ShowHistory")
conf.set("spark.driver.host", "localhost")
val sc = new SparkContext(conf)
sc.setLogLevel("ERROR")

val spark = SparkSession.builder.appName("ShowHistory").getOrCreate()


# Helper Functions

In [None]:
def getYearDF(lastMatchDF : DataFrame, spark: SparkSession) : DataFrame = {

    import spark.implicits._

    val years  = lastMatchDF.withColumn("year", year($":target-match-timestamp")).select("year").distinct()
    val earliestYear : Int  = years.sort($"year".asc).head().get(0).toString.toInt
    val latestYear : Int = years.sort($"year".desc).head().get(0).toString.toInt
    val monthList = (earliestYear to latestYear).toList.flatMap(x => (1 to 12).toList.map(i => x + "-" + i + "-01"))
    val yearDF = monthList.toDF("timeString").withColumn("time", to_date($"timeString", "yyyy-MM-dd")).select("time")

    yearDF
}

#  Create/Load spark dataframes

In [None]:
import spark.implicits._    
val rawDataDF = spark.sqlContext.read.parquet(CACHED_DATA_DIR)
val tournamentMapper = TARGET_TOURNAMENTS.toDF(":tournament")
val saisonMapper =  TARGET_SEASONS.toDF(":saison")

In [None]:
val allPlayers = rawDataDF.select(":player",":saison",":match",":team",":tournament",
    ":target-match-timestamp",
    playtimeColumn,
    totalOffPointsColumn,
    totalDefPointsColumn,
    avgOffPointsColumn,
    avgDefPointsColumn,
    totalRankedOffPointsColumn,
    totalRankedDefPointsColumn,
    avgRankedOffPointsColumn,
    avgRankedDefPointsColumn)
    .join(saisonMapper, ":saison")
    .join(tournamentMapper, ":tournament")


In [None]:
val yearDF = getYearDF(allPlayers, spark)


In [None]:
val allPlayersByTimeFrame = allPlayers
    .withColumn("time",  to_date( concat_ws("-", year($":target-match-timestamp"), month($":target-match-timestamp"), lit("01") ), "yyyy-MM"))
    .groupBy(":player", "time")
    .agg(sum(totalOffPointsColumn).as("totalOffNeutralPoints"),
        avg(avgOffPointsColumn).as("avgOffNeutralPoints"),
        sum(totalRankedOffPointsColumn).as("totalOffRankedPoints"),
        avg(avgRankedOffPointsColumn).as("avgOffRankedPoints"),
        sum(totalDefPointsColumn).as("totalDefNeutralPoints"),
        avg(avgDefPointsColumn).as("avgDefNeutralPoints"),
        sum(totalRankedDefPointsColumn).as("totalDefRankedPoints"),
        avg(avgRankedDefPointsColumn).as("avgDefRankedPoints"),
        sum(playtimeColumn).as("playtime"))


# Building avg values of all players

In [None]:

val avgPlayer = allPlayersByTimeFrame
    .groupBy("time")
    .agg(avg("totalOffNeutralPoints").as("totalOffNeutralPoints"),
        avg("avgOffNeutralPoints").as("avgOffNeutralPoints"),
        avg("totalOffRankedPoints").as("totalOffRankedPoints"),
        avg("avgOffRankedPoints").as("avgOffRankedPoints"),
        avg("totalDefNeutralPoints").as("totalDefNeutralPoints"),
        avg("avgDefNeutralPoints").as("avgDefNeutralPoints"),
        avg("totalDefRankedPoints").as("totalDefRankedPoints"),
        avg("avgDefRankedPoints").as("avgDefRankedPoints"))
    .join(yearDF, Seq("time"), "right_outer")
    .na.fill(0)
    .withColumn(":name", lit("avg"))

avgPlayer.count



# Aggregate players off values to time frames

In [None]:
val avgPlayerSeasonPerformance = allPlayersByTimeFrame
    .groupBy(":player")
    .agg(avg("totalOffRankedPoints").as("totalOffRankedPoints"),
        avg("avgOffRankedPoints").as("avgOffRankedPoints"),
        avg("totalDefRankedPoints").as("totalDefRankedPoints"),
        avg("avgDefRankedPoints").as("avgDefRankedPoints"),
        sum("playtime").as("playtime"))
    .filter($"playtime" >= (800 * TAKE_LAST_X_DATA))

val best5TotalOff = avgPlayerSeasonPerformance.sort($"totalOffRankedPoints".desc).limit(5)
val best5AvgOff = avgPlayerSeasonPerformance.sort($"avgOffRankedPoints".desc).limit(5)
val best5TotalDef = avgPlayerSeasonPerformance.sort($"totalDefRankedPoints".desc).limit(5)
val best5AvgDef = avgPlayerSeasonPerformance.sort($"avgDefRankedPoints".desc).limit(5)


# Top 5 total offensive points

In [None]:
val best5TotalOffPlotable = allPlayersByTimeFrame.select("time", ":player", "totalOffRankedPoints")
    .join(yearDF.crossJoin(best5TotalOff.select(":player")), Seq("time", ":player"), "right_outer")
    .na.fill(0)
    .union(avgPlayer.select("time", ":name", "totalOffRankedPoints"))

best5TotalOff.show

Vegas("Ranked player total off points")
  .withDataFrame(best5TotalOffPlotable)
  .mark(Line)
  .encodeX("time", Ordinal)
  .encodeY("totalOffRankedPoints", Quant)
  .encodeColor(
    field=":player",
    dataType=Nominal,
    legend=Legend(orient="left", title="Ranked player total off points"))
.show


# Top 5 average offensive points

In [None]:
val best5AvgOffPlotable = allPlayersByTimeFrame.select("time", ":player", "avgOffRankedPoints")
    .join(yearDF.crossJoin(best5AvgOff.select(":player")), Seq("time", ":player"), "right_outer")
    .na.fill(0)
    .union(avgPlayer.select("time", ":name", "avgOffRankedPoints"))

best5AvgOff.show

Vegas("Ranked player avg off points")
  .withDataFrame(best5AvgOffPlotable)
  .mark(Line)
  .encodeX("time", Ordinal, axis=Axis(grid=true))
  .encodeY("avgOffRankedPoints", Quant)
  .encodeColor(
    field=":player",
    dataType=Nominal,
    legend=Legend(orient="left", title="Ranked player avg off points"))
.show


# Top 5 total defensive points

In [None]:
val lowestTotalDef = avgPlayerSeasonPerformance.agg(min("totalDefRankedPoints")).take(1).head.get(0)


val totalDefAvgPlayer = allPlayersByTimeFrame
    .groupBy("time")
    .agg(avg("totalDefRankedPoints").as("totalDefRankedPoints"))
    .select($"time", ($"totalDefRankedPoints" - lit(lowestTotalDef)).as("totalDefRankedPoints"))
    .join(yearDF, Seq("time"), "right_outer")
    .na.fill(0)
    .withColumn(":name", lit("avg"))


val best5TotalDefPlotable = allPlayersByTimeFrame.select($"time", $":player", ($"totalDefRankedPoints" - lit(lowestTotalDef)).as("points"))
    .join(yearDF.crossJoin(best5TotalDef.select(":player")), Seq("time", ":player"), "right_outer")
    .na.fill(0)
    .union(totalDefAvgPlayer.select($"time", $":name", $"totalDefRankedPoints" ))

best5TotalDef.show

Vegas("Ranked player avg off points")
  .withDataFrame(best5TotalDefPlotable)
  .mark(Line)
  .encodeX("time", Ordinal, axis=Axis(grid=true))
  .encodeY("points", Quant)
  .encodeColor(
    field=":player",
    dataType=Nominal,
    legend=Legend(orient="left", title="Ranked player total def points"))
.show

# Top 5 average defensive points

In [None]:
val lowestAvgDef = avgPlayerSeasonPerformance.agg(min("avgDefRankedPoints")).take(1).head.get(0)

val avgDefAvgPlayer = allPlayersByTimeFrame
    .groupBy("time")
    .agg(avg("avgDefRankedPoints").as("avgDefRankedPoints"))
    .select($"time", ($"avgDefRankedPoints" - lit(lowestAvgDef)).as("avgDefRankedPoints"))
    .join(yearDF, Seq("time"), "right_outer")
    .na.fill(0)
    .withColumn(":name", lit("avg"))

val best5AvgDefPlotable = allPlayersByTimeFrame.select($"time", $":player", ($"avgDefRankedPoints" - lit(lowestAvgDef)).as("points"))
    .join(yearDF.crossJoin(best5AvgDef.select(":player")), Seq("time", ":player"), "right_outer")
    .na.fill(0)
    .union(avgDefAvgPlayer.select("time", ":name", "avgDefRankedPoints"))

best5AvgDef.show

Vegas("Ranked player avg off points")
  .withDataFrame(best5AvgDefPlotable)
  .mark(Line)
  .encodeX("time", Ordinal, axis=Axis(grid=true))
  .encodeY("points", Quant)
  .encodeColor(
    field=":player",
    dataType=Nominal,
    legend=Legend(orient="left", title="Ranked player avg def points"))
.show

In [None]:
avgPlayerSeasonPerformance.sort($"totalOffRankedPoints".asc).limit(5).show
avgPlayerSeasonPerformance.sort($"avgOffRankedPoints".asc).limit(5).show
avgPlayerSeasonPerformance.sort($"totalDefRankedPoints".asc).limit(5).show
avgPlayerSeasonPerformance.sort($"avgDefRankedPoints".asc).limit(5).show
