In [0]:
# importing necessary libraries
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import *

# creating spark session
spark=SparkSession.builder.getOrCreate()
file_path = "/Volumes/workspace/default/volume_intro"

# reading csv file data
df= spark.read.format('csv')\
                .option("header", "true")\
                .option("inferSchema", "true")\
                .load(file_path)

# printing file data
df.display()
# creating view for the dataframe: In Databricks, the createTempView method is used to create a local temporary view from a DataFrame. This temporary view acts like a virtual table that you can query using SQL within the same Spark session

# To delete (drop) a temporary view in PySpark (Databricks)
spark.catalog.dropTempView("my_view")
df.createTempView('my_view')


Country,Age,Salary,Purchased
France,44.0,72000.0,No
Spain,27.0,48000.0,Yes
Germany,30.0,54000.0,No
Spain,38.0,61000.0,No
Germany,40.0,,Yes
France,35.0,58000.0,Yes
Spain,,52000.0,No
France,48.0,79000.0,Yes
Germany,50.0,83000.0,No
France,37.0,67000.0,Yes


In [0]:
%sql
-- View all contents of table: SELECT * FROM <table_name>
select * from my_view

Country,Age,Salary,Purchased
France,44.0,72000.0,No
Spain,27.0,48000.0,Yes
Germany,30.0,54000.0,No
Spain,38.0,61000.0,No
Germany,40.0,,Yes
France,35.0,58000.0,Yes
Spain,,52000.0,No
France,48.0,79000.0,Yes
Germany,50.0,83000.0,No
France,37.0,67000.0,Yes


In [0]:
%sql
--Create database: CREATE DATABASE <name>;
create DATABASE sample;

In [0]:
%sql
-- create table in the db
create table sample_table
(
    `Id` bigint primary key,
    `Name` varchar(10) NOT Null,
    `Age` bigint not null,
    `City` char(50),
    `Salary` numeric(7,2)
);

In [0]:
%sql
select * from sample_table;

Id,Name,Age,City,Salary


In [0]:
%sql
-- insert values into the db 
insert into sample_table(`Id`,`Name`,`Age`,`City`,`Salary`)
values
(1,'Sam',26,'Delhi',6000),
(2,'Dam',24,'Bangalore',7000),
(3,'Jam',25,'Pune',5000)

num_affected_rows,num_inserted_rows
3,3


In [0]:
%sql
-- update some values in a row in db
update sample_table set Salary=8000,Age=29 where Id=1

num_affected_rows
1


In [0]:
%sql
-- delete a row in the db
delete from sample_table where Id=1

num_affected_rows
1


In [0]:
%sql
-- Adding new column to the table: If you add new column to the existing table all default values are NULL
ALTER TABLE sample_table ADD COLUMN Gender varchar(10);

In [0]:
%sql
-- dropping a column
ALTER TABLE sample_table SET TBLPROPERTIES ('delta.columnMapping.mode' = 'name');ALTER TABLE sample_table DROP COLUMN Gender;


In [0]:
%sql
-- changing the data type of a column
-- alter table sample_table alter column Gender type char(10); (not working)

-- Add a new column with the desired data type
ALTER TABLE sample_table ADD COLUMNS (Gender_new CHAR(10));

-- Update the new column with values from the old column
UPDATE sample_table SET Gender_new = Gender;

-- Drop the old column
ALTER TABLE sample_table DROP COLUMN Gender;

-- Rename the new column to the original column name
ALTER TABLE sample_table RENAME COLUMN Gender_new TO Gender;

In [0]:
%sql
-- PROJECTION "It is a process of retrieving the data by selecting only the columns is known as Projection "  we can retrieve in any order we want
select Salary,Age,City from sample_table;

Salary,Age,City
7000.0,24,Bangalore
5000.0,25,Pune
4500.0,30,Chennai


In [0]:
%sql
--DISTINCT Clause " It is used to remove the duplicate or repeated values from the Result table " 
select distinct Salary from sample_table;

Salary
7000.0
5000.0
