In [0]:
#fetching mounted points
dbutils.fs.ls("/mnt/Salespoint")

In [0]:
#fetching data 1000_sales_records.csv from mounted location 

df = spark.read.format("csv").option("header", "true").option("inferSchema", "true").load("/mnt/Salespoint/1000_Sales_Records.csv")
display(df)

In [0]:
#selecting columns from the dataframe
selected_df = df.select("Region","Country","ItemType","SalesChannel")
selected_df.show()

In [0]:
#renaming SalesChannel to SalesPlatform
selected_df = selected_df.withColumnRenamed("SalesChannel","SalesPlatform")
selected_df.show()

In [0]:
%python
from pyspark.sql.functions import sum, desc, round

# calculating total revenue for Region, ItemType, SalesChannel
kpidf = df.groupBy("Region", "ItemType", "SalesChannel") \
          .agg(round(sum("TotalRevenue"), 2).alias("TotalRevenue")) \
          .orderBy(desc("TotalRevenue"))

display(kpidf)

In [0]:
#creating tempview so that we can execute with SQL 

kpidf.createOrReplaceTempView("kpidf")

In [0]:
%sql
SELECT region, itemtype, saleschannel, sum(TotalRevenue) as total_revenue 
FROM kpidf 
GROUP BY region, itemtype, saleschannel 
ORDER BY total_revenue DESC

In [0]:
#saving total revenveue data to adls 
kpidf.write.format("csv").option("header", "true").mode("overwrite").save("/mnt/Salespoint/Salestotalrevenue")

In [0]:
#writing file in DBFS 
dbutils.fs.ls('/FileStore/tables/Prajwal')

In [0]:
%python
#writing total revenue to DBFS location
kpidf.write.format("csv").option("header", "true").mode("overwrite").save("/FileStore/tTables/Prajwal")

In [0]:
#saving total revenue to adls with delta format 
kpidf.write.format("delta").mode("overwrite").save("/mnt/Salespoint/Salestotalrevenuedelta")

In [0]:
%python
#saving total revenue to adls with delta format as table
kpidf.write.format("delta").mode("overwrite").saveAsTable("Salespoint_Salestotalrevenuedelta")

In [0]:
%python
# Define the JDBC URL
jdbc_url = "jdbc:sqlserver://keerdbserver.database.windows.net:1433;database=prajwalpanadedb;user=Hcluser@keerdbserver;password={your_password_here};encrypt=true;trustServerCertificate=false;hostNameInCertificate=*.database.windows.net;loginTimeout=30;"

# Define connection properties
connection_properties = {
    "user": "hcluser",
    "password": "Hclindia@123",
    "driver": "com.microsoft.sqlserver.jdbc.SQLServerDriver"
}

# Check connectivity (by loading a dummy table, replace 'INFORMATION_SCHEMA.TABLES' with your table name)
df = spark.read.jdbc(
    url=jdbc_url,
    table="SalesLT.Address",
    properties=connection_properties
)
display(df)

In [0]:
#we have selecting few columns and writing to adls
col_df = df.select("AddressLine1", "City", "PostalCode", "StateProvince", "CountryRegion")
col_df.show()

In [0]:
col_df.write.format("csv").option("header", "true").mode("overwrite").save("/mnt/Salespoint/Address")