### **Paso 4.2 - Mejoras en el Synapse notebook**

#### Crear un notebook y utilizando un Spark Pool conectarse a ADLS

In [None]:
%%pyspark
from notebookutils import mssparkutils
from pyspark.sql.functions import *

#### ====== Esto se agregó al Pipeline ======

In [None]:
import datetime

# Esto es Python no es Spark
dateFormat = "%Y-%m-%d"

ts = spark.sql("SELECT CURRENT_TIMESTAMP() AS ctime")

ts.printSchema() # ctime: timestamp (nullable = false)

ts.show(truncate=False)

root
 |-- ctime: timestamp (nullable = false)

+-----------------------+
|ctime                  |
+-----------------------+
|2023-05-30 22:22:40.921|
+-----------------------+



In [None]:
# Primera forma para obtener el timestamp

ts.collect() # [Row(ctime=datetime.datetime(2023, 5, 30, 21, 38, 4, 538000))]

ts.collect()[0]["ctime"] # datetime.datetime(2023, 5, 30, 21, 38, 4, 538000)

Out[18]: datetime.datetime(2023, 5, 30, 21, 54, 47, 142000)

In [None]:
ts = spark.sql("SELECT CURRENT_TIMESTAMP() AS ctime").collect()[0]["ctime"]

In [None]:
# Segunda forma para obtener el timestamp

from pyspark.sql.functions import current_date, current_timestamp

ts = ts.select(current_timestamp().alias('ctime'))

ts.collect()[0]["ctime"] # datetime.datetime(2023, 5, 30, 22, 0, 51, 624000)

Out[36]: datetime.datetime(2023, 5, 30, 22, 16, 19, 957000)

In [None]:
# Obtenemos la fecha --> Timestamp a Date --> Pero este 'Date' es un STRING
# Esta función es de Python, no es Spark

todaydate = ts.strftime(dateFormat)
print(todaydate)

2023-05-30


#### ===================================

In [None]:
%%pyspark
sourceaccountName = "ottadls011"
sourcecontainer = "raw"
sourceLinkedService = "LS_ADLS_transform"
sourceFile_location = "ingest/"+todaydate      <------------------ Agregamos '+todaydate'

https://learn.microsoft.com/en-us/azure/synapse-analytics/spark/apache-spark-secure-credentials-with-tokenlibrary?pivots=programming-language-python
<center><img src="https://i.postimg.cc/tT45hpCj/adf530.png"></center>

In [None]:
%%pyspark
spark.conf.set('spark.storage.synapse.linkedServiceName', 'LS_ADLS_transform')
spark.conf.set('fs.azure.account.oauth.provider.type', 'com.microsoft.azure.synapse.tokenlibrary.LinkedServiceBasedTokenProvider')

path = f'abfss://{sourcecontainer}@{sourceaccountName}.dfs.core.windows.net/'

print(path) # abfss://raw@ottadls011.dfs.core.windows.net/

In [None]:
%%pyspark
file = f'abfss://{sourcecontainer}@{sourceaccountName}.dfs.core.windows.net/{sourceFile_location}'

print(file) # abfss://raw@ottadls011.dfs.core.windows.net/ingest/2023-05-30

In [None]:
%%pyspark
source_df = spark.read.format("csv") \
                      .option("inferSchema",True) \
                      .option("header",True) \
                      .option("sep",",") \
                      .load(file)