# creating schema for Import and Export Table

In [1]:

from pyspark.sql.types import *

orderSchema = StructType([
    StructField("Region", StringType()),
    StructField("Date", StringType()),
    StructField("Year", StringType()),
    StructField("Month", StringType()),
    StructField("Quarter", StringType()),
    StructField("E_Total_Current_Account", StringType()),
    StructField("E_Good_and_Services", StringType()),
    StructField("E_Goods", StringType()),
    StructField("E_Services", StringType()),
    StructField("E_Primary_Income", StringType()),
    StructField("E_Compensation_of_Employee", StringType()),
    StructField("E_Investment_Income", StringType()),
    StructField("E_Direct_Investment_Income", StringType()),
    StructField("E_Portfolio_Investment_Income", StringType()),
    StructField("E_Other_Investment_Income", StringType()),
    StructField("E_Secondary_Income", StringType()),
    StructField("E_Private_Transfer", StringType()),
    StructField("E_Government_Transfer", StringType()),
    StructField("I_Total_Current_Account", StringType()),
    StructField("I_Goods_and_Services", StringType()),
    StructField("I_Goods", StringType()),
    StructField("I_Services", StringType()),
    StructField("I_Primary_Income", StringType()),
    StructField("I_Compensation_of_Employee", StringType()),
    StructField("I_Investment_Income", StringType()),
    StructField("I_Direct_Investment_Income", StringType()),
    StructField("I_Portfolio_Investment_Income", StringType()),
    StructField("I_Other_Investment_Income", StringType()),
    StructField("I_Secondary_Income", StringType()),
    StructField("I_Private_Transfer", StringType()),
    StructField("I_Government_Transfer", StringType())
])


# # Import all files from bronze folder of lakehouse
df = spark.read.format("csv").option("header", "true").schema(orderSchema).load("Files/bronze/Trade.csv")

# # Display the first 10 rows of the dataframe to preview your data
display(df.head(10))


StatementMeta(, e2e245d5-e460-42b1-8857-84db2834b7f1, 3, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 0da4fe97-4c0c-40b9-ac6a-b3b408711e8a)

In [7]:
from pyspark.sql.functions import col, when, trim
df2 = df.select([
    when(trim(col(c)) == "", None).otherwise(col(c)).alias(c)
    for c in df.columns
])


StatementMeta(, e2e245d5-e460-42b1-8857-84db2834b7f1, 9, Finished, Available, Finished)

In [8]:
df_clean = df2.na.drop(subset=["Date"])

StatementMeta(, e2e245d5-e460-42b1-8857-84db2834b7f1, 10, Finished, Available, Finished)

In [9]:
display(df.head(10))

StatementMeta(, e2e245d5-e460-42b1-8857-84db2834b7f1, 11, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, ea67953c-2f15-4f94-ae11-03e5ccea5802)

# Update date table to have date format



In [10]:
from pyspark.sql.functions import to_date, col
df = df.withColumn("Date", to_date(col("Date"), "dd-MM-yyyy"))
display(df.head(10))

StatementMeta(, e2e245d5-e460-42b1-8857-84db2834b7f1, 12, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, dc6fa5a2-5721-4262-9f1e-4afb0af3c7b9)

# Define the schema for the Trade_silver_table

In [11]:
# # Define the schema for the Trade_silver_table

from pyspark.sql.types import *
from delta.tables import *

DeltaTable.createIfNotExists(spark) \
.tableName("Trade_silver") \
.addColumn("Region", StringType())\
.addColumn("Date", DateType())\
.addColumn("Year", StringType())\
.addColumn("Month", StringType())\
.addColumn("Quarter", StringType())\
.addColumn("E_Total_Current_Account", StringType())\
.addColumn("E_Good_and_Services", StringType())\
.addColumn("E_Goods", StringType())\
.addColumn("E_Services", StringType())\
.addColumn("E_Primary_Income", StringType())\
.addColumn("E_Compensation_of_Employee", StringType())\
.addColumn("E_Investment_Income", StringType())\
.addColumn("E_Direct_Investment_Income", StringType())\
.addColumn("E_Portfolio_Investment_Income", StringType())\
.addColumn("E_Other_Investment_Income", StringType())\
.addColumn("E_Secondary_Income", StringType())\
.addColumn("E_Private_Transfer", StringType())\
.addColumn("E_Government_Transfer", StringType())\
.addColumn("I_Total_Current_Account", StringType())\
.addColumn("I_Goods_and_Services", StringType())\
.addColumn("I_Goods", StringType())\
.addColumn("I_Services", StringType())\
.addColumn("I_Primary_Income", StringType())\
.addColumn("I_Compensation_of_Employee", StringType())\
.addColumn("I_Investment_Income", StringType())\
.addColumn("I_Direct_Investment_Income", StringType())\
.addColumn("I_Portfolio_Investment_Income", StringType())\
.addColumn("I_Other_Investment_Income", StringType())\
.addColumn("I_Secondary_Income", StringType())\
.addColumn("I_Private_Transfer", StringType())\
.addColumn("I_Government_Transfer", StringType())\
.execute()

StatementMeta(, e2e245d5-e460-42b1-8857-84db2834b7f1, 13, Finished, Available, Finished)

<delta.tables.DeltaTable at 0x73493bfedfd0>

# Loading the  the data to the new table after creating the columns

In [12]:
df.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("dbo.trade_silver")

StatementMeta(, e2e245d5-e460-42b1-8857-84db2834b7f1, 14, Finished, Available, Finished)