In [0]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window



df = (
    spark.read.format("csv")
        .option("inferSchema", "true")
        .option("header", "true")
        .load("/Volumes/raw-data/banking/csv/Banking_Database.csv")
)

df.printSchema()
df.display(5)

# creating multiple frames 
from pyspark.sql import functions as F

customers = (
    df.select(
        F.col("Customer ID").alias("customer_id"),
        F.col("First Name").alias("first_name"),
        F.col("Last Name").alias("last_name"),
        "Age",
        "Gender",
        "City",
        "Email"
    )
    .dropDuplicates(["customer_id"])
)

accounts = df.select(
    F.col("Customer ID").alias("customer_id"),
    "Account Type",
    "Account Balance",
    "Date Of Account Opening",
    "Branch ID"
).dropDuplicates()

transactions = df.select(
    F.col("TransactionID").alias("transaction_id"),
    F.col("Customer ID").alias("customer_id"),
    "Transaction Date",
    "Transaction Type",
    "Transaction Amount",
    "Account Balance After Transaction"
).dropna(subset=["transaction_id"])

loans = df.select(
    F.col("Loan ID").alias("loan_id"),
    F.col("Customer ID").alias("customer_id"),
    "Loan Amount",
    "Loan Type",
    "Interest Rate",
    "Loan Term",
    "Loan Status"
).dropna(subset=["loan_id"])

cards = df.select(
    F.col("CardID").alias("card_id"),
    F.col("Customer ID").alias("customer_id"),
    "Card Type",
    "Credit Limit",
    "Credit Card Balance",
    "Rewards Points"
).dropna(subset=["card_id"])

In [0]:
### APPEND: Adds new records without impacting existing data

customers.write \
    .mode("append") \
    .format("csv") \
    .option("path", "/Volumes/raw-data/banking/csv")\
    .save()

In [0]:
write_path = "/Volumes/raw-data/banking/csv"


df.write \
  .mode("overwrite") \
  .option("header", "true") \
  .csv(write_path)



In [0]:
### IGNORE : Silently skips the write if data already exists.

customers.write \
    .mode("ignore") \
    .format("delta") \
    .save("/mnt/banking/customers")

In [0]:
### ERROR: Throws an exception if the path already exists.

customers.write \
    .mode("error") \
    .format("delta") \
    .save("/mnt/banking/customers")
