In [3]:
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

val king_data = Seq(
    Row(1, "Robb Stark", "House Stark"),
    Row(2, "Joffrey Baratheon", "House Lannister"),
    Row(3, "Stannis Baratheon", "House Baratheon"),
    Row(4, "Balon Greyjoy", "House Greyjoy"),
    Row(5, "Mace Tyrell", "House Tyrell"),
    Row(6, "Doran Martell", "House Martell")
)

val battle_data = Seq(
    Row(1, "Battle of Oxcross", 1, 2, 1, "The North"),
    Row(2, "Battle of Blackwater", 3, 4, 0, "The North"),
    Row(3, "Battle of the Fords", 1, 5, 1, "The Reach"),
    Row(4, "Battle of the Green Fork", 2, 6, 0, "The Reach"),
    Row(5, "Battle of the Ruby Ford", 1, 3, 1, "The Riverlands"),
    Row(6, "Battle of the Golden Tooth", 2, 1, 0, "The North"),
    Row(7, "Battle of Riverrun", 3, 4, 1, "The Riverlands"),
    Row(8, "Battle of Riverrun", 1, 3, 0, "The Riverlands")
)


val king_schema = StructType(Array(
            StructField("k_no", IntegerType)
           ,StructField("king", StringType)
           ,StructField("house", StringType)
        ))


val battle_schema = StructType(Array(
    StructField("battle_number", IntegerType),
    StructField("name", StringType),
    StructField("attacker_king", IntegerType),
    StructField("defender_king", IntegerType),
    StructField("attacker_outcome", IntegerType),
    StructField("region", StringType)
))

val king_rdd = spark.sparkContext.parallelize(king_data)
val king_df = spark.createDataFrame(king_rdd, king_schema)

val battle_rdd = spark.sparkContext.parallelize(battle_data)
val battle_df = spark.createDataFrame(battle_rdd, battle_schema)

king_df.show(false)
battle_df.show(false)


+----+-----------------+---------------+
|k_no|king             |house          |
+----+-----------------+---------------+
|1   |Robb Stark       |House Stark    |
|2   |Joffrey Baratheon|House Lannister|
|3   |Stannis Baratheon|House Baratheon|
|4   |Balon Greyjoy    |House Greyjoy  |
|5   |Mace Tyrell      |House Tyrell   |
|6   |Doran Martell    |House Martell  |
+----+-----------------+---------------+

+-------------+--------------------------+-------------+-------------+----------------+--------------+
|battle_number|name                      |attacker_king|defender_king|attacker_outcome|region        |
+-------------+--------------------------+-------------+-------------+----------------+--------------+
|1            |Battle of Oxcross         |1            |2            |1               |The North     |
|2            |Battle of Blackwater      |3            |4            |0               |The North     |
|3            |Battle of the Fords       |1            |5            |1   

import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
king_data: Seq[org.apache.spark.sql.Row] = List([1,Robb Stark,House Stark], [2,Joffrey Baratheon,House Lannister], [3,Stannis Baratheon,House Baratheon], [4,Balon Greyjoy,House Greyjoy], [5,Mace Tyrell,House Tyrell], [6,Doran Martell,House Martell])
battle_data: Seq[org.apache.spark.sql.Row] = List([1,Battle of Oxcross,1,2,1,The North], [2,Battle of Blackwater,3,4,0,The North], [3,Battle of the Fords,1,5,1,The Reach], [4,Battle of the Green Fork,2,6,0,The Reach], [5,Battle of the Ruby Ford,1,3,1,The Riverlands], [6,Battle of the Golden Tooth,2,1,0,The North], [7,Battle of Riverrun,3,4,1,The Riverlands], [8,Battle of Riverrun,1,3,0,The Riverlands])
king_schema: org.apache.spark.sql.types.StructType = StructType(StructFie...


In [30]:
import spark.implicits.StringToColumn

val battle_winners_df = battle_df.withColumn("winner", when($"attacker_outcome" === 1, $"attacker_king"
                                   ).otherwise($"defender_king") 
                        )
battle_winners_df.show(false)

+-------------+--------------------------+-------------+-------------+----------------+--------------+------+
|battle_number|name                      |attacker_king|defender_king|attacker_outcome|region        |winner|
+-------------+--------------------------+-------------+-------------+----------------+--------------+------+
|1            |Battle of Oxcross         |1            |2            |1               |The North     |1     |
|2            |Battle of Blackwater      |3            |4            |0               |The North     |4     |
|3            |Battle of the Fords       |1            |5            |1               |The Reach     |1     |
|4            |Battle of the Green Fork  |2            |6            |0               |The Reach     |6     |
|5            |Battle of the Ruby Ford   |1            |3            |1               |The Riverlands|1     |
|6            |Battle of the Golden Tooth|2            |1            |0               |The North     |1     |
|7        

import spark.implicits.StringToColumn
battle_winners_df: org.apache.spark.sql.DataFrame = [battle_number: int, name: string ... 5 more fields]


In [31]:
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions.row_number


battle_winners_df.join(king_df, $"winner" === $"k_no", "inner"
                      ).select("name", "region"
                      ).groupBy($"region", $"name").agg(count(lit(1)).as("winner_count")
                      ).withColumn("rnk",row_number().over(Window.partitionBy($"region").orderBy($"winner_count".desc))
                      ).filter($"rnk" === 1).show(false)

+--------------+--------------------+------------+---+
|region        |name                |winner_count|rnk|
+--------------+--------------------+------------+---+
|The North     |Battle of Blackwater|1           |1  |
|The Reach     |Battle of the Fords |1           |1  |
|The Riverlands|Battle of Riverrun  |2           |1  |
+--------------+--------------------+------------+---+



import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions.row_number
