In [1]:
from pyspark.sql import SparkSession

# Initialize a Spark session
spark = SparkSession.builder \
    .appName("Spark API") \
    .getOrCreate()

In [2]:
# Read the CSV files into a DataFrame accounts, country_abbreviation, transactions
df_accounts = spark.read.csv("accounts.csv", header=True, inferSchema=True, sep=";")
df_country_abbreviation = spark.read.csv("country_abbreviation.csv", header=True, inferSchema=True, sep=";")
df_transactions = spark.read.csv("transactions.csv", header=True, inferSchema=True, sep=";")

In [3]:
#get Switzerland code
df_country_abbreviation.filter(df_country_abbreviation.country_full_name == 'Switzerland' ).show()

+-----------------+------------+
|country_full_name|abbreviation|
+-----------------+------------+
|      Switzerland|          CH|
+-----------------+------------+



In [4]:
#get Swiss accounts with Full Name columne
from pyspark.sql.functions import concat_ws, col
df_accounts = df_accounts.filter(df_accounts.country == 'CH').withColumn('user_full_name', concat_ws(' ', col('first_name'), col('last_name'))).select("id","user_full_name")

In [5]:
#Pick possitive transactions with year amount
from pyspark.sql.functions import year
df_transactions = df_transactions.filter(df_transactions.amount>0).withColumn("year", year(col('transaction_date'))).select("id", "amount", "year")

In [6]:
#Filter transactions per account
df_transactions = df_transactions.join(df_accounts, on="id").select(df_accounts["user_full_name"], df_transactions["amount"], df_transactions["year"])

In [7]:
#The Ressult
pivoted_df = df_transactions.groupBy("user_full_name").pivot("year").sum("amount")

In [8]:
pivoted_df.show()

+----------------+--------+------------------+------------------+--------+--------+------------------+------------------+-------+------------------+------------------+------------------+
|  user_full_name|    2011|              2012|              2013|    2014|    2015|              2016|              2017|   2018|              2019|              2020|              2021|
+----------------+--------+------------------+------------------+--------+--------+------------------+------------------+-------+------------------+------------------+------------------+
|   Connie Gibson|    null|           8486.84|           3664.69| 7385.69|    null|           1717.48|           2436.81|   null|           5545.76|              null|              null|
|    Paige Taylor|    null|              null|           5618.18|    null|    null|           2784.89|           2634.62|   null|           9963.01|          14458.31|            436.22|
|  Adison Douglas|    null|          10622.48|          14322.94|