### Create SparkSession:

In [1]:
import findspark
findspark.init()
from pyspark.sql import SparkSession
from pyspark import SparkConf, SparkContext
spark = SparkSession.builder.getOrCreate()
sc = spark.sparkContext
spark

24/08/27 02:25:19 WARN Utils: Your hostname, DESKTOP-26AECPL resolves to a loopback address: 127.0.1.1; using 192.168.220.1 instead (on interface eth1)
24/08/27 02:25:19 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/08/27 02:25:20 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


24/08/27 02:25:33 WARN GarbageCollectionMetrics: To enable non-built-in garbage collector(s) List(G1 Concurrent GC), users should configure it(them) to spark.eventLog.gcMetrics.youngGenerationGarbageCollectors or spark.eventLog.gcMetrics.oldGenerationGarbageCollectors


### Read the DataFrames_sample.json file:

In [2]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("ReadJSONExample").getOrCreate()
df = spark.read.json("DataFrames_sample.json")

24/08/27 02:29:04 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.
                                                                                

### Display part of the data and schema:


In [3]:
df.show()

+----+----+---------+---+-----------+----+----------+-----+------+----+
|   D|   H|      HDD| Id|      Model| RAM|ScreenSize|    W|Weight|Year|
+----+----+---------+---+-----------+----+----------+-----+------+----+
|9.48|0.61|512GB SSD|  1|MacBook Pro|16GB|       15"|13.75|  4.02|2015|
|7.74|0.52|256GB SSD|  2|    MacBook| 8GB|       12"|11.04|  2.03|2016|
|8.94|0.68|128GB SSD|  3|MacBook Air| 8GB|     13.3"| 12.8|  2.96|2016|
| 8.0|20.3|  1TB SSD|  4|       iMac|64GB|       27"| 25.6|  20.8|2017|
+----+----+---------+---+-----------+----+----------+-----+------+----+



In [4]:
df.printSchema()

root
 |-- D: double (nullable = true)
 |-- H: double (nullable = true)
 |-- HDD: string (nullable = true)
 |-- Id: long (nullable = true)
 |-- Model: string (nullable = true)
 |-- RAM: string (nullable = true)
 |-- ScreenSize: string (nullable = true)
 |-- W: double (nullable = true)
 |-- Weight: double (nullable = true)
 |-- Year: long (nullable = true)



## Using SQL
### Create Temp View:

In [5]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("SQLTempViewExample").getOrCreate()
df = spark.read.json("DataFrames_sample.json")
df.createOrReplaceTempView("temp_view_name")
result_df = spark.sql("SELECT * FROM temp_view_name")
result_df.show()

24/08/27 02:30:35 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


+----+----+---------+---+-----------+----+----------+-----+------+----+
|   D|   H|      HDD| Id|      Model| RAM|ScreenSize|    W|Weight|Year|
+----+----+---------+---+-----------+----+----------+-----+------+----+
|9.48|0.61|512GB SSD|  1|MacBook Pro|16GB|       15"|13.75|  4.02|2015|
|7.74|0.52|256GB SSD|  2|    MacBook| 8GB|       12"|11.04|  2.03|2016|
|8.94|0.68|128GB SSD|  3|MacBook Air| 8GB|     13.3"| 12.8|  2.96|2016|
| 8.0|20.3|  1TB SSD|  4|       iMac|64GB|       27"| 25.6|  20.8|2017|
+----+----+---------+---+-----------+----+----------+-----+------+----+



### Display "RAM"column and count "RAM" column:

In [13]:
result_df = spark.sql("""
    SELECT RAM, COUNT(*) as c
    FROM temp_view_name
    GROUP BY RAM
    ORDER BY RAM ASC
""")

result_df.show()


+----+---+
| RAM|  c|
+----+---+
|16GB|  1|
|64GB|  1|
| 8GB|  2|
+----+---+



### Get all columns when "Year" column equal "2015"  

In [14]:
result_df = spark.sql("""
    SELECT *
    FROM temp_view_name
    WHERE Year = '2015'
""")

result_df.show()


+----+----+---------+---+-----------+----+----------+-----+------+----+
|   D|   H|      HDD| Id|      Model| RAM|ScreenSize|    W|Weight|Year|
+----+----+---------+---+-----------+----+----------+-----+------+----+
|9.48|0.61|512GB SSD|  1|MacBook Pro|16GB|       15"|13.75|  4.02|2015|
+----+----+---------+---+-----------+----+----------+-----+------+----+



### Get all when "Model" start with "M":

In [15]:
result_df = spark.sql("""
    SELECT Model
    FROM temp_view_name
    WHERE Model LIKE 'M%'
""")

result_df.show()

+-----------+
|      Model|
+-----------+
|MacBook Pro|
|    MacBook|
|MacBook Air|
+-----------+



### Get all data when "Model" column equal "MacBook Pro"

In [16]:
result_df = spark.sql("""
    SELECT *
    FROM temp_view_name
    WHERE Model = 'MacBook Pro'
""")

result_df.show()


+----+----+---------+---+-----------+----+----------+-----+------+----+
|   D|   H|      HDD| Id|      Model| RAM|ScreenSize|    W|Weight|Year|
+----+----+---------+---+-----------+----+----------+-----+------+----+
|9.48|0.61|512GB SSD|  1|MacBook Pro|16GB|       15"|13.75|  4.02|2015|
+----+----+---------+---+-----------+----+----------+-----+------+----+



### Get all data with Multiple Conditions when "RAM" column equal "8GB" and "Model" column is "Macbook".

In [17]:
result_df = spark.sql("""
    SELECT *
    FROM temp_view_name
    WHERE RAM = '8GB' AND Model = 'MacBook'
""")

result_df.show()


+----+----+---------+---+-------+---+----------+-----+------+----+
|   D|   H|      HDD| Id|  Model|RAM|ScreenSize|    W|Weight|Year|
+----+----+---------+---+-------+---+----------+-----+------+----+
|7.74|0.52|256GB SSD|  2|MacBook|8GB|       12"|11.04|  2.03|2016|
+----+----+---------+---+-------+---+----------+-----+------+----+



### Get all data with Multiple Conditions when "D" greater than or equal "8" and "Model" column is "iMac".

In [18]:
result_df = spark.sql("""
    SELECT *
    FROM temp_view_name
    WHERE D >= 8 AND Model = 'iMac'
""")

result_df.show()


+---+----+-------+---+-----+----+----------+----+------+----+
|  D|   H|    HDD| Id|Model| RAM|ScreenSize|   W|Weight|Year|
+---+----+-------+---+-----+----+----------+----+------+----+
|8.0|20.3|1TB SSD|  4| iMac|64GB|       27"|25.6|  20.8|2017|
+---+----+-------+---+-----+----+----------+----+------+----+

