In [0]:
from pyspark import SparkConf
from pyspark.sql.session import SparkSession
from pyspark.sql.types import StructType,StructField,StringType,IntegerType,DateType
from pyspark.sql.functions import col,sum
from datetime import datetime

spark = SparkSession.builder.appName("app").master("local[3]").getOrCreate()

In [0]:
schema = StructType([
    StructField("account",IntegerType(),False),
    StructField("name",StringType(),False)
])
data = [
( 900001     , "Alice"       ) ,
( 900002     , "Bob"         ) ,
( 900003     , "Charlie"     ) 
]
users = spark.createDataFrame(data,schema)
users.show()

+-------+-------+
|account|   name|
+-------+-------+
| 900001|  Alice|
| 900002|    Bob|
| 900003|Charlie|
+-------+-------+



In [0]:
schema = StructType([
    StructField("trans_id",IntegerType(),False),
    StructField("account",IntegerType(),False),
    StructField("amount",IntegerType(),False),
    StructField("transacted_on",DateType(),False),
])
data = [
    ( 1          , 900001     , 7000       ,  datetime(2020,8,1 ) ) ,
    ( 2          , 900001     , 7000       ,  datetime(2020,9,1 ) ) ,
    ( 3          , 900001     , -3000      ,  datetime(2020,9,2 ) ) ,
    ( 4          , 900002     , 1000       ,  datetime(2020,9,2 ) ) ,
    ( 5          , 900003     , 6000       ,  datetime(2020,8,7 ) ) ,
    ( 6          , 900003     , 6000       ,  datetime(2020,9,7 ) ) ,
    ( 7          , 900003     , -4000      ,  datetime(2020,9,11) ) 
]
trans = spark.createDataFrame(data,schema)
trans.show()


+--------+-------+------+-------------+
|trans_id|account|amount|transacted_on|
+--------+-------+------+-------------+
|       1| 900001|  7000|   2020-08-01|
|       2| 900001|  7000|   2020-09-01|
|       3| 900001| -3000|   2020-09-02|
|       4| 900002|  1000|   2020-09-02|
|       5| 900003|  6000|   2020-08-07|
|       6| 900003|  6000|   2020-09-07|
|       7| 900003| -4000|   2020-09-11|
+--------+-------+------+-------------+



In [0]:
# Write a solution to report the name and balance of users with a balance higher than 10000. The balance of an account is equal to the sum of the amounts of all transactions involving that account.
# Return the result table in any order.

trans.groupBy("account").agg(sum("amount").alias("balance")).where(col("balance")>10000).join(users,trans.account==users.account,"left").select("name","balance").show()

+-----+-------+
| name|balance|
+-----+-------+
|Alice|  11000|
+-----+-------+



In [0]:
trans.createOrReplaceTempView("t")
users.createOrReplaceTempView("u")

spark.sql("select u.name, sum(t.amount) as balance from u right join t using (account) group by u.account,u.name having sum(t.amount)>10000 ").show()

+-----+-------+
| name|balance|
+-----+-------+
|Alice|  11000|
+-----+-------+



In [0]:
spark.stop()