In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when, count
from pyspark.sql.window import Window
from pyspark.sql.functions import rank

# Create a Spark session
spark = SparkSession.builder.appName("GameOfThronesBattles").getOrCreate()

# Create the king DataFrame
king_data = [
    (1, 'Robb Stark', 'House Stark'),
    (2, 'Joffrey Baratheon', 'House Lannister'),
    (3, 'Stannis Baratheon', 'House Baratheon'),
    (4, 'Balon Greyjoy', 'House Greyjoy'),
    (5, 'Mace Tyrell', 'House Tyrell'),
    (6, 'Doran Martell', 'House Martell')
]

king_columns = ["k_no", "king", "house"]
king_df = spark.createDataFrame(king_data, king_columns)

# Create the battle DataFrame
battle_data = [
    (1, 'Battle of Oxcross', 1, 2, 1, 'The North'),
    (2, 'Battle of Blackwater', 3, 4, 0, 'The North'),
    (3, 'Battle of the Fords', 1, 5, 1, 'The Reach'),
    (4, 'Battle of the Green Fork', 2, 6, 0, 'The Reach'),
    (5, 'Battle of the Ruby Ford', 1, 3, 1, 'The Riverlands'),
    (6, 'Battle of the Golden Tooth', 2, 1, 0, 'The North'),
    (7, 'Battle of Riverrun', 3, 4, 1, 'The Riverlands'),
    (8, 'Battle of Riverrun', 1, 3, 0, 'The Riverlands')
]

battle_columns = ["battle_number", "name", "attacker_king", "defender_king", "attacker_outcome", "region"]
battle_df = spark.createDataFrame(battle_data, battle_columns)

# Show the data in the king DataFrame
print("King DataFrame:")
king_df.display()

# Show the data in the battle DataFrame
print("Battle DataFrame:")
battle_df.display()


King DataFrame:


k_no,king,house
1,Robb Stark,House Stark
2,Joffrey Baratheon,House Lannister
3,Stannis Baratheon,House Baratheon
4,Balon Greyjoy,House Greyjoy
5,Mace Tyrell,House Tyrell
6,Doran Martell,House Martell


Battle DataFrame:


battle_number,name,attacker_king,defender_king,attacker_outcome,region
1,Battle of Oxcross,1,2,1,The North
2,Battle of Blackwater,3,4,0,The North
3,Battle of the Fords,1,5,1,The Reach
4,Battle of the Green Fork,2,6,0,The Reach
5,Battle of the Ruby Ford,1,3,1,The Riverlands
6,Battle of the Golden Tooth,2,1,0,The North
7,Battle of Riverrun,3,4,1,The Riverlands
8,Battle of Riverrun,1,3,0,The Riverlands


#Pyspark

In [0]:

# Join battle with king to get attacker and defender king names and houses
joined_df = battle_df.alias("b") \
    .join(king_df.alias("a"), col("b.attacker_king") == col("a.k_no"), "inner") \
    .join(king_df.alias("d"), col("b.defender_king") == col("d.k_no"), "inner") \
    .select(
        col("b.*"),
        col("a.king").alias("attacker_king_name"),
        col("d.king").alias("defender_king_name"),
        when(col("attacker_outcome") == 1, col("a.house")).otherwise(col("d.house")).alias("winner")
    )

# Perform the grouping and ranking
grouped_df = joined_df.groupBy("region", "winner") \
    .agg(count("winner").alias("totalwins"))

window_spec = Window.partitionBy("region").orderBy(col("totalwins").desc())

ranked_df = grouped_df.withColumn("rn", rank().over(window_spec))

# Filter to get the top winner for each region
result_df = ranked_df.filter(col("rn") == 1).select("region", col("winner").alias("house"), col("totalwins").alias("no_of_wins"))

# Show the result
result_df.display()

region,house,no_of_wins
The North,House Stark,2
The Reach,House Stark,1
The Reach,House Martell,1
The Riverlands,House Baratheon,2


In [0]:
king_df.createOrReplaceTempView("king")
battle_df.createOrReplaceTempView("battle")

#Spark SQL

In [0]:
%sql
with mycte as
(
	select b.*, a.king as attacker_king_name, d.king as defender_king_name,
	case when attacker_outcome = 1 then a.house else d.house end as winner
	from battle as b
	join king as a
	on b.attacker_king = a.k_no
	join king as d
	on b.defender_king = d.k_no
),
cte2 as
(
	select region, winner, count(winner) as totalwins,
	rank() over(partition by region order by count(winner) desc) as rn
	from mycte
	group by region, winner		
)

select region, winner as house, totalwins as no_of_wins
from cte2
where rn = 1

region,house,no_of_wins
The North,House Stark,2
The Reach,House Stark,1
The Reach,House Martell,1
The Riverlands,House Baratheon,2
