In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.types import DoubleType, StringType
from pyspark.sql import functions as f
from uuid import uuid4
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
url = "jdbc:postgresql://db:5432/fuel_analysis"
properties = {
    "user": "root",
    "password": "root",
    "driver": "org.postgresql.Driver"
}

In [3]:
def create_uuid():
    return str(uuid4())

uuid_udf = f.udf(lambda: create_uuid(), StringType())

In [4]:
spark = SparkSession.builder \
    .appName("Dolar ETL") \
    .master("local[*]") \
    .config("spark.jars", "/usr/local/spark/jars/postgresql-42.7.3.jar") \
    .getOrCreate()


In [5]:
def extract_data(path):
  df = spark.read.csv(path, sep=';', inferSchema=True, header=True)
  
  return df

def transform_data(df):
    df = df.withColumn(
        "data",
        f.to_date(f.col("data").cast(StringType()), 'dd/MM/yyyy')
        )\
        .withColumn('valor', f.regexp_replace('valor', ',', '.'))\
        .withColumn('valor', f.col('valor').cast(DoubleType()))\
        .withColumn('dia', f.dayofmonth(f.col('data')))\
        .withColumn('mes', f.month(f.col('data')))\
        .withColumn('ano', f.year(f.col('data')))\
        .withColumn('dia_semana', f.dayofweek(f.col('data')))
        
    return df


In [7]:
base_path = '/home/jovyan/data/'
path = base_path+'dolar-data-jan-2004-to-dez-2023.csv'
df = extract_data(path)

In [8]:
dollar_info = transform_data(df)

In [9]:
dollar_info.write.jdbc(url=url, table="dollar_info", mode="append", properties=properties)

In [10]:
def load_dim_table(table_name):
    return spark.read.jdbc(url=url, table=table_name, properties=properties)

In [11]:
doll = load_dim_table("dollar_info")

In [12]:
doll.show()

+--------------------+----------+---+---+----+----------+--------------------+
|           dollar_id|      data|dia|mes| ano|dia_semana|               valor|
+--------------------+----------+---+---+----+----------+--------------------+
|e27f326a-4bf4-461...|2003-12-31| 31| 12|2003|         4|2.888400000000000000|
|260e974a-3753-494...|2004-01-02|  2|  1|2004|         6|2.885400000000000000|
|ed7eacbf-8e74-49b...|2004-01-05|  5|  1|2004|         2|2.861900000000000000|
|72fd723f-78e1-4a8...|2004-01-06|  6|  1|2004|         3|2.850000000000000000|
|65df32b4-ddf5-425...|2004-01-07|  7|  1|2004|         4|2.871500000000000000|
|e779a80f-c268-49d...|2004-01-08|  8|  1|2004|         5|2.858000000000000000|
|d2c4f914-407b-46b...|2004-01-09|  9|  1|2004|         6|2.841400000000000000|
|cc26f5a3-4e8d-4db...|2004-01-12| 12|  1|2004|         2|2.815500000000000000|
|8f124029-8873-4f5...|2004-01-13| 13|  1|2004|         3|2.801400000000000000|
|e84f638b-81a0-4a6...|2004-01-14| 14|  1|2004|      