In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Create SparkSession:

In [3]:
import pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql import functions as F


### Read the DataFrames_sample.json file:

In [10]:
df=spark.read.json("/content/drive/MyDrive/Data (1)/Data/DataFrames_sample.json")

### Display part of the data and schema:


In [11]:
df.show()

+----+----+---------+---+-----------+----+----------+-----+------+----+
|   D|   H|      HDD| Id|      Model| RAM|ScreenSize|    W|Weight|Year|
+----+----+---------+---+-----------+----+----------+-----+------+----+
|9.48|0.61|512GB SSD|  1|MacBook Pro|16GB|       15"|13.75|  4.02|2015|
|7.74|0.52|256GB SSD|  2|    MacBook| 8GB|       12"|11.04|  2.03|2016|
|8.94|0.68|128GB SSD|  3|MacBook Air| 8GB|     13.3"| 12.8|  2.96|2016|
| 8.0|20.3|  1TB SSD|  4|       iMac|64GB|       27"| 25.6|  20.8|2017|
+----+----+---------+---+-----------+----+----------+-----+------+----+



In [12]:
df.printSchema()

root
 |-- D: double (nullable = true)
 |-- H: double (nullable = true)
 |-- HDD: string (nullable = true)
 |-- Id: long (nullable = true)
 |-- Model: string (nullable = true)
 |-- RAM: string (nullable = true)
 |-- ScreenSize: string (nullable = true)
 |-- W: double (nullable = true)
 |-- Weight: double (nullable = true)
 |-- Year: long (nullable = true)



## Using SQL
### Create Temp View:

In [19]:
df.createOrReplaceTempView("table1")

### Display "RAM"column and count "RAM" column:

In [20]:
count_ram=spark.sql("SELECT RAM ,COUNT(RAM) AS C FROM table1 GROUP BY RAM")
count_ram.show()

+----+---+
| RAM|  C|
+----+---+
|64GB|  1|
|16GB|  1|
| 8GB|  2|
+----+---+



### Get all columns when "Year" column equal "2015"  

In [23]:
year_2015=spark.sql("SELECT * FROM table1 WHERE YEAR==2015")
year_2015.show()

+----+----+---------+---+-----------+----+----------+-----+------+----+
|   D|   H|      HDD| Id|      Model| RAM|ScreenSize|    W|Weight|Year|
+----+----+---------+---+-----------+----+----------+-----+------+----+
|9.48|0.61|512GB SSD|  1|MacBook Pro|16GB|       15"|13.75|  4.02|2015|
+----+----+---------+---+-----------+----+----------+-----+------+----+



### Get all when "Model" start with "M":

In [25]:
MODEL_M=spark.sql("SELECT MODEL FROM table1 WHERE MODEL LIKE 'M%'")
MODEL_M.show()

+-----------+
|      MODEL|
+-----------+
|MacBook Pro|
|    MacBook|
|MacBook Air|
+-----------+



### Get all data when "Model" column equal "MacBook Pro"

In [26]:
MODEL_MacBook=spark.sql("SELECT * FROM table1 WHERE MODEL=='MacBook Pro' ")
MODEL_MacBook.show()

+----+----+---------+---+-----------+----+----------+-----+------+----+
|   D|   H|      HDD| Id|      Model| RAM|ScreenSize|    W|Weight|Year|
+----+----+---------+---+-----------+----+----------+-----+------+----+
|9.48|0.61|512GB SSD|  1|MacBook Pro|16GB|       15"|13.75|  4.02|2015|
+----+----+---------+---+-----------+----+----------+-----+------+----+



### Get all data with Multiple Conditions when "RAM" column equal "8GB" and "Model" column is "Macbook".

In [27]:
RAM_AND_MODEL=spark.sql("SELECT * FROM table1 WHERE MODEL=='MacBook' AND RAM=='8GB'")
RAM_AND_MODEL.show()


+----+----+---------+---+-------+---+----------+-----+------+----+
|   D|   H|      HDD| Id|  Model|RAM|ScreenSize|    W|Weight|Year|
+----+----+---------+---+-------+---+----------+-----+------+----+
|7.74|0.52|256GB SSD|  2|MacBook|8GB|       12"|11.04|  2.03|2016|
+----+----+---------+---+-------+---+----------+-----+------+----+



### Get all data with Multiple Conditions when "D" greater than or equal "8" and "Model" column is "iMac".

In [28]:
MODEL_D=spark.sql("SELECT * FROM table1 WHERE MODEL=='iMac' AND D>=8")
MODEL_D.show()

+---+----+-------+---+-----+----+----------+----+------+----+
|  D|   H|    HDD| Id|Model| RAM|ScreenSize|   W|Weight|Year|
+---+----+-------+---+-----+----+----------+----+------+----+
|8.0|20.3|1TB SSD|  4| iMac|64GB|       27"|25.6|  20.8|2017|
+---+----+-------+---+-----+----+----------+----+------+----+

