In [1]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.conf import SparkConf
from pyspark.context import SparkContext

In [2]:
credentials_location = "/home/jovyan/cred/credentials.json"

conf = SparkConf() \
    .setMaster("local[*]") \
    .setAppName("Test Connection") \
    .set("spark.jars", "https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop3-latest.jar") \
    .set("spark.hadoop.google.cloud.auth.service.account.enable", "true") \
    .set("spark.hadoop.google.cloud.auth.service.account.json.keyfile", credentials_location)

In [3]:
sc = SparkContext(conf=conf)

hadoop_conf = sc._jsc.hadoopConfiguration()

hadoop_conf.set("fs.AbstractFileSystem.gs.impl",  "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS")
hadoop_conf.set("fs.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem")
hadoop_conf.set("fs.gs.auth.service.account.json.keyfile", credentials_location)
hadoop_conf.set("fs.gs.auth.service.account.enable", "true")

In [4]:
spark = SparkSession.builder \
    .config(conf=sc.getConf()) \
    .getOrCreate()

In [5]:
spark

In [6]:
df = spark.read.csv("gs://supawat-workshop-bucket/raw_data/financial_transactions.csv", header=True)

In [7]:
df.printSchema()

root
 |-- transaction_id: string (nullable = true)
 |-- customer_id: string (nullable = true)
 |-- transaction_date: string (nullable = true)
 |-- amount: string (nullable = true)
 |-- merchant: string (nullable = true)
 |-- category: string (nullable = true)
 |-- payment_method: string (nullable = true)
 |-- city: string (nullable = true)
 |-- country: string (nullable = true)
 |-- status: string (nullable = true)



In [8]:
df.show(df.count(), truncate=False)

+--------------+-----------+----------------+--------+-------------+-----------------+--------------+---------------------+------------+---------+
|transaction_id|customer_id|transaction_date|amount  |merchant     |category         |payment_method|city                 |country     |status   |
+--------------+-----------+----------------+--------+-------------+-----------------+--------------+---------------------+------------+---------+
|T0001         |C164       |2025-09-16      |714.9   |Tops Market  |Mobile & Internet|Credit Card   |khon kaen            |THAILAND    |Failed   |
|T0002         |C167       |2025/09/23      |NULL    |Central World|Shopping         |PromptPay     |phuket               |thailand    |Pending  |
|T0003         |C025       |20-09-2025      |3654.06 |SCB Easy Pay |Shopping         |Cash          |hat yai              |thailand    |Failed   |
|T0004         |C059       |2025-09-19      |1913.03 |TrueMove H   |Shopping         |Cash          |hat yai          

In [9]:
spark.stop()