In [1]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("SparkSQLExample") \
    .getOrCreate()

spark

In [4]:
from pyspark.sql import Row

data = [
    Row(EmpID=101, Name="Ravi", Department="Sales", Salary=50000),
    Row(EmpID=102, Name="Sneha", Department="Engineering", Salary=80000),
    Row(EmpID=103, Name="Kabir", Department="HR", Salary=45000),
    Row(EmpID=104, Name="Anita", Department="Engineering", Salary=85000),
    Row(EmpID=105, Name="Amit", Department="Sales", Salary=55000),
]

df = spark.createDataFrame(data)
df.show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  101| Ravi|      Sales| 50000|
|  102|Sneha|Engineering| 80000|
|  103|Kabir|         HR| 45000|
|  104|Anita|Engineering| 85000|
|  105| Amit|      Sales| 55000|
+-----+-----+-----------+------+



In [5]:
df.createOrReplaceTempView("employees")

In [6]:
spark.sql('SELECT * FROM employees').show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  101| Ravi|      Sales| 50000|
|  102|Sneha|Engineering| 80000|
|  103|Kabir|         HR| 45000|
|  104|Anita|Engineering| 85000|
|  105| Amit|      Sales| 55000|
+-----+-----+-----------+------+



In [7]:
spark.sql('SELECT Name, Department FROM employees').show()

+-----+-----------+
| Name| Department|
+-----+-----------+
| Ravi|      Sales|
|Sneha|Engineering|
|Kabir|         HR|
|Anita|Engineering|
| Amit|      Sales|
+-----+-----------+



In [8]:
spark.sql('SELECT Name, Salary FROM employees WHERE Department="Engineering"').show()

+-----+------+
| Name|Salary|
+-----+------+
|Sneha| 80000|
|Anita| 85000|
+-----+------+



In [9]:
spark.sql('SELECT * FROM employees WHERE Salary > 50000').show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  102|Sneha|Engineering| 80000|
|  104|Anita|Engineering| 85000|
|  105| Amit|      Sales| 55000|
+-----+-----+-----------+------+



In [10]:
spark.sql('SELECT Department, AVG(Salary) FROM employees GROUP BY Department').show()

+-----------+-----------+
| Department|avg(Salary)|
+-----------+-----------+
|      Sales|    52500.0|
|Engineering|    82500.0|
|         HR|    45000.0|
+-----------+-----------+



In [11]:
spark.sql('SELECT * FROM employees ORDER BY Salary DESC').show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  104|Anita|Engineering| 85000|
|  102|Sneha|Engineering| 80000|
|  105| Amit|      Sales| 55000|
|  101| Ravi|      Sales| 50000|
|  103|Kabir|         HR| 45000|
+-----+-----+-----------+------+



In [14]:
df.createOrReplaceGlobalTempView("employees_global")

In [15]:
spark.sql('SELECT * FROM global_temp.employees_global').show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  101| Ravi|      Sales| 50000|
|  102|Sneha|Engineering| 80000|
|  103|Kabir|         HR| 45000|
|  104|Anita|Engineering| 85000|
|  105| Amit|      Sales| 55000|
+-----+-----+-----------+------+



In [16]:
new_spark = SparkSession.builder \
    .appName("NewSession") \
    .getOrCreate()

new_spark

In [17]:
new_spark.sql('SELECT Name FROM global_temp.employees_global').show()

+-----+
| Name|
+-----+
| Ravi|
|Sneha|
|Kabir|
|Anita|
| Amit|
+-----+

