#### References

https://docs.azuredatabricks.net/spark/latest/spark-sql/index.html<br>
http://spark.apache.org/docs/latest/sql-programming-guide.html<br>

In [2]:
parquet_path = "/mnt/hack/parquet/sample/dat202/repartition/"
table_name = "airports"

#### Prepare database

In [4]:
display(spark.catalog.listDatabases())

In [5]:
db_name = "hackdb"

In [6]:
spark.sql("DROP DATABASE IF EXISTS " + db_name + " CASCADE")

In [7]:
spark.sql("CREATE DATABASE IF NOT EXISTS " + db_name)

In [8]:
spark.catalog.setCurrentDatabase(db_name)

#### Create external table on Parquet files, then query it

In [10]:
# If this is run without specifying a db name, it will use the current database set above

spark.catalog.listTables(db_name)

In [11]:
# Another way to list tables

display(sqlContext.tables())

In [12]:
spark.sql("DROP TABLE IF EXISTS " + table_name)

In [13]:
# String concat

spark.sql("""
CREATE TABLE IF NOT EXISTS """ + table_name + """
(
  airport_id INT,
  city STRING,
  state STRING,
  name STRING
)
USING parquet
LOCATION '""" + parquet_path + """'"""
)

In [14]:
%sql
-- Explicit SQL = hard-coding...

CREATE TABLE IF NOT EXISTS airports
(
  airport_id INT,
  city STRING,
  state STRING,
  name STRING
)
USING parquet
LOCATION "/mnt/hack/parquet/sample/dat202/repartition/"

In [15]:
%sql 
-- Derivative table

DROP TABLE IF EXISTS airports_cleaned;

CREATE TABLE airports_cleaned AS
SELECT
  int(airport_id) as airport_id,
  city,
  state,
  name
FROM
  airports

In [16]:
# Alternative to spark.catalog.listTables()

display(spark.sql("SHOW TABLES"))

In [17]:
%sql SHOW TABLES

In [18]:
display(spark.sql("SELECT * FROM " + table_name))

In [19]:
%sql
SELECT * FROM airports_cleaned