In [1]:
from pyspark.sql import SparkSession

In [2]:
spark = SparkSession.builder.appName('c10').getOrCreate()

In [3]:
spark.sql("SELECT 1 + 1").show()

+-------+
|(1 + 1)|
+-------+
|      2|
+-------+



In [6]:
spark.read.json('flight-data/json/2015-summary.json')\
.createOrReplaceTempView("some_sql_view") #DF => SQL

In [7]:
spark.sql("""
SELECT DEST_COUNTRY_NAME, sum(count)
FROM some_sql_view GROUP BY DEST_COUNTRY_NAME
""")\
.where("DEST_COUNTRY_NAME like '%S'").where("`sum(count)` > 10")\
.count() # SQL => DF

0

In [11]:
#uncorrelated predicate subqueries
spark.sql("""
SELECT dest_country_name FROM some_sql_view
GROUP BY dest_country_name ORDER BY sum(count) DESC LIMIT 5
""").show()

+-----------------+
|dest_country_name|
+-----------------+
|    United States|
|           Canada|
|           Mexico|
|   United Kingdom|
|            Japan|
+-----------------+



In [13]:
spark.sql("""
SELECT * FROM some_sql_view
WHERE origin_country_name IN (SELECT dest_country_name FROM some_sql_view
      GROUP BY dest_country_name ORDER BY sum(count) DESC LIMIT 5)
""").show()

+--------------------+-------------------+-----+
|   DEST_COUNTRY_NAME|ORIGIN_COUNTRY_NAME|count|
+--------------------+-------------------+-----+
|               Egypt|      United States|   15|
|          Costa Rica|      United States|  588|
|             Senegal|      United States|   40|
|             Moldova|      United States|    1|
|              Guyana|      United States|   64|
|               Malta|      United States|    1|
|            Anguilla|      United States|   41|
|             Bolivia|      United States|   30|
|             Algeria|      United States|    4|
|Turks and Caicos ...|      United States|  230|
|Saint Vincent and...|      United States|    1|
|               Italy|      United States|  382|
|            Pakistan|      United States|   12|
|             Iceland|      United States|  181|
|    Marshall Islands|      United States|   42|
|          Luxembourg|      United States|  155|
|            Honduras|      United States|  362|
|         The Bahama

In [14]:
#correlated predicate subqueries

In [16]:
spark.sql("""
SELECT * FROM some_sql_view f1
WHERE EXISTS (SELECT 1 FROM some_sql_view f2
              WHERE f1.dest_country_name = f2.origin_country_name)
AND EXISTS (SELECT 1 FROM some_sql_view f2
            WHERE f2.dest_country_name = f1.origin_country_name)
""").show()

+--------------------+--------------------+-----+
|   DEST_COUNTRY_NAME| ORIGIN_COUNTRY_NAME|count|
+--------------------+--------------------+-----+
|       United States|             Romania|   15|
|       United States|             Croatia|    1|
|       United States|             Ireland|  344|
|               Egypt|       United States|   15|
|       United States|               India|   62|
|       United States|           Singapore|    1|
|       United States|             Grenada|   62|
|          Costa Rica|       United States|  588|
|             Senegal|       United States|   40|
|       United States|        Sint Maarten|  325|
|       United States|    Marshall Islands|   39|
|              Guyana|       United States|   64|
|               Malta|       United States|    1|
|            Anguilla|       United States|   41|
|             Bolivia|       United States|   30|
|       United States|            Paraguay|    6|
|Turks and Caicos ...|       United States|  230|


In [17]:
#uncorrelated scalar queries

In [19]:
spark.sql("""
SELECT *, (SELECT max(count) FROM some_sql_view) AS maximum FROM some_sql_view
""").show()

+--------------------+-------------------+-----+-------+
|   DEST_COUNTRY_NAME|ORIGIN_COUNTRY_NAME|count|maximum|
+--------------------+-------------------+-----+-------+
|       United States|            Romania|   15| 370002|
|       United States|            Croatia|    1| 370002|
|       United States|            Ireland|  344| 370002|
|               Egypt|      United States|   15| 370002|
|       United States|              India|   62| 370002|
|       United States|          Singapore|    1| 370002|
|       United States|            Grenada|   62| 370002|
|          Costa Rica|      United States|  588| 370002|
|             Senegal|      United States|   40| 370002|
|             Moldova|      United States|    1| 370002|
|       United States|       Sint Maarten|  325| 370002|
|       United States|   Marshall Islands|   39| 370002|
|              Guyana|      United States|   64| 370002|
|               Malta|      United States|    1| 370002|
|            Anguilla|      Uni