<h2>
    Write a Pyspark code to print the cumulative balance of the
merchant account at the end of each day, with the total balance reset back to zero at the end of the month.
</h2>

In [3]:
from pyspark.sql import SparkSession
spark=SparkSession.builder.appName('Cumulative-Transactions').getOrCreate()

In [19]:
from pyspark.sql.functions import to_timestamp
data=[(19153, 'deposit', 65.90, '2022-07-10 10:00:00'), 
(53151, 'deposit', 178.55, '2022-07-08 10:00:00'), 
(29776, 'withdrawal', 25.90, '2022-07-08 10:00:00'), 
(16461, 'withdrawal', 45.99, '2022-07-08 13:00:00'), 
(77134, 'deposit', 32.60, '2022-07-10 10:00:00')]

schema="""
 transaction_id INT,
 type STRING,
 amount float,
 transaction_date string
"""
df=spark.createDataFrame(data,schema)
df=df.withColumn("transaction_date",to_timestamp(df.transaction_date))
df.printSchema()

root
 |-- transaction_id: integer (nullable = true)
 |-- type: string (nullable = true)
 |-- amount: float (nullable = true)
 |-- transaction_date: timestamp (nullable = true)



In [30]:
from pyspark.sql.functions import sum,to_date,expr,col

closing_balance_df=df.groupBy(to_date(df.transaction_date).alias('transaction_date')).agg(sum(expr("CASE WHEN type='deposit' then amount else -amount end")).alias("closing_balance")).orderBy(col('transaction_date'))
closing_balance_df.show()

+----------------+------------------+
|transaction_date|   closing_balance|
+----------------+------------------+
|      2022-07-08|106.66000175476074|
|      2022-07-10|              98.5|
+----------------+------------------+



In [40]:
from pyspark.sql.functions import year,month,round
from pyspark.sql.window import Window
window_spec=Window.partitionBy(year(col('transaction_date')),month(col('transaction_date'))).orderBy(col('transaction_date'))
final_df=closing_balance_df.withColumn("balance",sum(col('closing_balance')).over(window_spec)).select(col('transaction_date'),round(col('balance'),2).alias('balance'))
final_df.show()

+----------------+-------+
|transaction_date|balance|
+----------------+-------+
|      2022-07-08| 106.66|
|      2022-07-10| 205.16|
+----------------+-------+

