In [1]:
%load_ext sparksql_magic

In [2]:
import os
from pyspark.sql import SparkSession
from delta import configure_spark_with_delta_pip

In [3]:
spark = (
    SparkSession.builder
    .appName("TestDataPlatform")
    .master("local[*]")
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
    .config("spark.hadoop.fs.s3a.path.style.access", os.getenv("SPARK_S3_PATH_STYLE_ACCESS",default="true"))
    .config("spark.hadoop.fs.s3a.endpoint", os.getenv("SPARK_S3_ENDPOINT",default="http://minio.data-services.svc.cluster.local:9000"))
    .config("spark.hadoop.fs.s3a.access.key", os.getenv("SPARK_S3_ACCESS_KEY",default="minioadmin"))
    .config("spark.hadoop.fs.s3a.secret.key", os.getenv("SPARK_S3_SECRET_KEY",default="minioadmin"))
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
    .config("spark.sql.catalogImplementation", "hive")
    .config("spark.sql.warehouse.dir", os.getenv("SPARK_HIVE_WAREHOUSE_DIR",default="s3a://warehouse/"))
    .config("hive.metastore.uris", os.getenv("SPARK_HIVE_METASTORE_URIS",default="thrift://hive-metastore.data-services.svc.cluster.local:9083"))
    .enableHiveSupport()
)
spark = configure_spark_with_delta_pip(spark).getOrCreate()

In [4]:
spark

In [5]:
%%sparksql
SHOW DATABASES;

0
namespace
default


In [6]:
%%sparksql
DROP DATABASE IF EXISTS landing;

In [7]:
%%sparksql
CREATE DATABASE IF NOT EXISTS bronze
LOCATION 's3a://warehouse/bronze/';

In [8]:
%%sparksql
CREATE DATABASE IF NOT EXISTS silver
LOCATION 's3a://warehouse/silver/';

In [9]:
%%sparksql
CREATE DATABASE IF NOT EXISTS gold
LOCATION 's3a://warehouse/gold/';

In [10]:
df = spark.read.format("parquet").load("./data/STA_TIPO_CAMBIO/")

In [11]:
df.show(10,False)

+----------+----------+
|Fecha     |TipoCambio|
+----------+----------+
|1990-01-01|3.61      |
|1990-01-02|3.45      |
|1990-01-03|3.21      |
|1990-01-04|3.62      |
|1990-01-05|3.72      |
|1990-01-06|3.81      |
|1990-01-07|3.97      |
|1990-01-08|3.84      |
|1990-01-09|3.63      |
|1990-01-10|3.81      |
+----------+----------+
only showing top 10 rows



In [12]:
df.write.format("parquet").saveAsTable("bronze.STA_TIPO_CAMBIO")

In [13]:
df.write.format("delta").saveAsTable("silver.STA_TIPO_CAMBIO")