In [0]:
from pyspark import SparkConf
from pyspark.sql.session import SparkSession
from pyspark.sql.types import StructType,StructField,IntegerType,StringType,DateType
from pyspark.sql.functions import col,sum,count,when,date_format
from datetime import datetime

spark = SparkSession.builder.appName("app").master("local[3]").getOrCreate()

In [0]:
schema = StructType([
    StructField("id",IntegerType(),False),
    StructField("country",StringType(),False),
    StructField("state",StringType(),False),
    StructField("amount",IntegerType(),False),
    StructField("trans_date",DateType(),False)
])
data = [
( 121  , 'US'      , 'approved' , 1000   , datetime(2018,12,18)) ,
( 122  , 'US'      , 'declined' , 2000   , datetime(2018,12,19)) ,
( 123  , 'US'      , 'approved' , 2000   , datetime(2019, 1, 1)) ,
( 124  , 'DE'      , 'approved' , 2000   , datetime(2019, 1, 7)) 
]
transactions = spark.createDataFrame(data,schema)
transactions.show()

+---+-------+--------+------+----------+
| id|country|   state|amount|trans_date|
+---+-------+--------+------+----------+
|121|     US|approved|  1000|2018-12-18|
|122|     US|declined|  2000|2018-12-19|
|123|     US|approved|  2000|2019-01-01|
|124|     DE|approved|  2000|2019-01-07|
+---+-------+--------+------+----------+



In [0]:
# Write an SQL query to find for each month and country, the number of transactions and their total amount, the number of approved transactions and their total amount.
# Return the result table in any order.

transactions.groupBy(date_format(col("trans_date"),'y-M'),'country')\
    .agg(\
        count("country").alias("trans_count"), \
        count( when(col("state")=="approved",1).otherwise(None)).alias("trans_total_amount"), \
        sum(col("amount")).alias("trans_total_amount"),\
        sum( when(col("state")=="approved",col("amount")).otherwise(0) ).alias("approved_total_amount")
        ).show()

+----------------------------+-------+-----------+------------------+------------------+---------------------+
|date_format(trans_date, y-M)|country|trans_count|trans_total_amount|trans_total_amount|approved_total_amount|
+----------------------------+-------+-----------+------------------+------------------+---------------------+
|                     2018-12|     US|          2|                 1|              3000|                 1000|
|                      2019-1|     US|          1|                 1|              2000|                 2000|
|                      2019-1|     DE|          1|                 1|              2000|                 2000|
+----------------------------+-------+-----------+------------------+------------------+---------------------+



In [0]:
transactions.createOrReplaceTempView("trans")
spark.sql("""select date_format(trans_date,'y-MM') as `month`,country,
                    count(*) as trans_count, 
                    count(case when state='approved' then amount else null end) approved_count,
                    sum(amount) trans_total_amount,
                    sum(case when state='approved' then amount else 0 end) approved_total_amount                 
            from trans group by 1,2""").show()

+-------+-------+-----------+--------------+------------------+---------------------+
|  month|country|trans_count|approved_count|trans_total_amount|approved_total_amount|
+-------+-------+-----------+--------------+------------------+---------------------+
|2018-12|     US|          2|             1|              3000|                 1000|
|2019-01|     US|          1|             1|              2000|                 2000|
|2019-01|     DE|          1|             1|              2000|                 2000|
+-------+-------+-----------+--------------+------------------+---------------------+



In [0]:
spark.stop()