## Examples of Spark SQL queries based on the Supermarket dataset 

## Load Supermarket Dataset from Lakehouse to Spark

In [None]:
df = spark.read.format("csv").option("header","true").load("Files/salesdata/Supermart Grocery Sales - Retail Analytics Dataset.csv")
# df now is a Spark DataFrame containing CSV data from "Files/salesdata/Supermart Grocery Sales - Retail Analytics Dataset.csv".
display(df)

## Register the DataFrame as a temporary SQL table

In [None]:
# Register the DataFrame as a temporary SQL table
df.createOrReplaceTempView("orders")

## On Your Code Window change code type = Spark SQL

## Select Specific Columns

In [None]:
%%sql
SELECT Order_ID, Customer_Name, Sales 
FROM orders;


## Where Clause

In [None]:
%%sql
SELECT * 
FROM orders 
WHERE City = 'Madurai';


## Group By

In [None]:
%%sql

SELECT Region, COUNT(*) AS Total_Orders 
FROM orders 
GROUP BY Region;

## Order By

In [None]:
%%sql
SELECT * 
FROM orders 
ORDER BY Sales DESC;


## Distinct

In [None]:
%%sql
SELECT DISTINCT City
FROM orders;


## Select Column, Count & Group By 

In [None]:
%%sql
SELECT city, COUNT(*) AS Total_Orders 
FROM orders 
WHERE Category = 'Snacks'
GROUP BY city ;


## Filter

In [None]:
%%sql
SELECT * 
FROM orders 
WHERE Discount > 0.3;


## Renaming Columns

In [None]:
%%sql
SELECT Order_ID AS OrderNumber, Customer_Name AS Customer, Sales AS Revenue 
FROM orders;


## Contains (String Matching)

In [None]:
%%sql
SELECT * 
FROM orders 
WHERE Customer_Name LIKE '%Ram%';


## IsIn (Checking order from City using IsIn)

In [None]:
%%sql
SELECT * 
FROM orders 
WHERE City IN ('Madurai', 'Perambalur');


## Advanced Filtering

In [None]:
%%sql
SELECT * 
FROM orders 
WHERE Sales > 1000 AND Profit > 200;


## Sorting

In [None]:
%%sql
SELECT * 
FROM orders 
ORDER BY Profit ASC, Sales DESC;


## Average

In [None]:
%%sql
SELECT AVG(Sales) AS Average_Sales 
FROM orders;


## Replace Values

In [None]:
%%sql
SELECT Order_ID, 
       REPLACE(State, 'Tamil Nadu', 'TN') AS State_Abbreviation 
FROM orders;
