In [1]:
from pyspark.sql import SparkSession

In [3]:
spark = SparkSession.builder\
                    .appName("Analysing Airline Data")\
                    .getOrCreate()

In [4]:
from pyspark.sql.types import Row
from datetime import datetime

In [6]:
record = sc.parallelize([Row(id = 1,
                            name = "Jill",
                            active = True,
                            clubs = ['chess', 'hockey'],
                            subjects = {'math':80, 'english':56},
                            enrolled = datetime(2019, 8, 1, 14, 1, 5)), 
                        Row(id = 2,
                           name = "George", 
                           active = False,
                           clubs = ['chess', 'soccer'],
                           subjects = {'math':60, 'english':96},
                           enrolled = datetime(2019, 8, 1, 14, 1, 6))
                        ])

In [7]:
record_df = record.toDF()
record_df.show()

+------+---------------+-------------------+---+------+--------------------+
|active|          clubs|           enrolled| id|  name|            subjects|
+------+---------------+-------------------+---+------+--------------------+
|  true|[chess, hockey]|2019-08-01 14:01:05|  1|  Jill|[english -> 56, m...|
| false|[chess, soccer]|2019-08-01 14:01:06|  2|George|[english -> 96, m...|
+------+---------------+-------------------+---+------+--------------------+



In [8]:
record_df.createOrReplaceTempView("Records")

In [10]:
sqlcontext = SQLContext(sc)

In [11]:
all_records_df = sqlcontext.sql('Select * from records')
all_records_df.show()

+------+---------------+-------------------+---+------+--------------------+
|active|          clubs|           enrolled| id|  name|            subjects|
+------+---------------+-------------------+---+------+--------------------+
|  true|[chess, hockey]|2019-08-01 14:01:05|  1|  Jill|[english -> 56, m...|
| false|[chess, soccer]|2019-08-01 14:01:06|  2|George|[english -> 96, m...|
+------+---------------+-------------------+---+------+--------------------+



In [12]:
sqlcontext.sql('select id, clubs[1], subjects["english"] from records').show()

+---+--------+-----------------+
| id|clubs[1]|subjects[english]|
+---+--------+-----------------+
|  1|  hockey|               56|
|  2|  soccer|               96|
+---+--------+-----------------+



In [13]:
sqlcontext.sql('select id, not active from records').show()

+---+------------+
| id|(NOT active)|
+---+------------+
|  1|       false|
|  2|        true|
+---+------------+



In [14]:
sqlcontext.sql('select * from records where active').show()

+------+---------------+-------------------+---+----+--------------------+
|active|          clubs|           enrolled| id|name|            subjects|
+------+---------------+-------------------+---+----+--------------------+
|  true|[chess, hockey]|2019-08-01 14:01:05|  1|Jill|[english -> 56, m...|
+------+---------------+-------------------+---+----+--------------------+



In [16]:
sqlcontext.sql("select * from records where subjects['english'] > 90").show()

+------+---------------+-------------------+---+------+--------------------+
|active|          clubs|           enrolled| id|  name|            subjects|
+------+---------------+-------------------+---+------+--------------------+
| false|[chess, soccer]|2019-08-01 14:01:06|  2|George|[english -> 96, m...|
+------+---------------+-------------------+---+------+--------------------+



In [17]:
record_df.createGlobalTempView('global_records')

In [18]:
sqlcontext.sql('select * from global_temp.global_records').show()

+------+---------------+-------------------+---+------+--------------------+
|active|          clubs|           enrolled| id|  name|            subjects|
+------+---------------+-------------------+---+------+--------------------+
|  true|[chess, hockey]|2019-08-01 14:01:05|  1|  Jill|[english -> 56, m...|
| false|[chess, soccer]|2019-08-01 14:01:06|  2|George|[english -> 96, m...|
+------+---------------+-------------------+---+------+--------------------+

