In [1]:
from pyspark.sql import SparkSession
import getpass
username = getpass.getuser()
spark = SparkSession. \
    builder. \
    config('spark.ui.port','0'). \
    config("spark.sql.warehouse.dir", f"/user/{username}/warehouse"). \
    enableHiveSupport(). \
    master('yarn'). \
    getOrCreate()

In [3]:
spark.sql("show databases").show(5)

+--------------------+
|           namespace|
+--------------------+
|0000000000000_msdian|
|0000000000000_nav...|
|0000000009874_retail|
|          00000_2_db|
|       00000assg5_db|
+--------------------+
only showing top 5 rows



In [4]:
spark.sql("create database if not exists itv019463_retail")

In [5]:
spark.sql("show databases").show(5)

+--------------------+
|           namespace|
+--------------------+
|0000000000000_msdian|
|0000000000000_nav...|
|0000000009874_retail|
|          00000_2_db|
|       00000assg5_db|
+--------------------+
only showing top 5 rows



In [6]:
spark.sql("show databases").filter("namespace like 'itv019463_%'").show()

+----------------+
|       namespace|
+----------------+
|itv019463_retail|
+----------------+



In [7]:
spark.sql("show tables").show(5)

+--------+--------------+-----------+
|database|     tableName|isTemporary|
+--------+--------------+-----------+
| default|         1htab|      false|
| default|41group_movies|      false|
| default| 4group_movies|      false|
| default|          4tab|      false|
| default| 6_flags_simon|      false|
+--------+--------------+-----------+
only showing top 5 rows



In [8]:
spark.sql("use itv019463_retail")

In [9]:
spark.sql("show tables").show()

+--------+---------+-----------+
|database|tableName|isTemporary|
+--------+---------+-----------+
+--------+---------+-----------+



In [10]:
spark.sql("create table if not exists itv019463_retail.orders (order_id integer, order_date string, customer_id integer, order_status string)")

In [11]:
spark.sql("show tables").show()

+----------------+---------+-----------+
|        database|tableName|isTemporary|
+----------------+---------+-----------+
|itv019463_retail|   orders|      false|
+----------------+---------+-----------+



In [12]:
orders_df = spark.read \
.format("csv") \
.option("header","true") \
.option("inferSchema", "true") \
.load("/user/itv019463/TrendyTechBigData/Week5/data/ordersWithHeader.csv")

In [13]:
orders_df.createOrReplaceTempView("orders")

In [14]:
spark.sql("show tables").show()

+----------------+---------+-----------+
|        database|tableName|isTemporary|
+----------------+---------+-----------+
|itv019463_retail|   orders|      false|
|                |   orders|       true|
+----------------+---------+-----------+



In [15]:
spark.sql("insert into itv019463_retail.orders select * from orders")

In [16]:
spark.sql("select * from itv019463_retail.orders limit 5").show()

+--------+--------------------+-----------+---------------+
|order_id|          order_date|customer_id|   order_status|
+--------+--------------------+-----------+---------------+
|   34565|2014-02-23 00:00:...|       8702|       COMPLETE|
|   34566|2014-02-23 00:00:...|       3066|PENDING_PAYMENT|
|   34567|2014-02-23 00:00:...|       7314|SUSPECTED_FRAUD|
|   34568|2014-02-23 00:00:...|       1271|       COMPLETE|
|   34569|2014-02-23 00:00:...|      11083|       COMPLETE|
+--------+--------------------+-----------+---------------+



In [17]:
spark.sql("describe table itv019463_retail.orders").show()

+------------+---------+-------+
|    col_name|data_type|comment|
+------------+---------+-------+
|    order_id|      int|   null|
|  order_date|   string|   null|
| customer_id|      int|   null|
|order_status|   string|   null|
+------------+---------+-------+



In [20]:
spark.sql("describe extended itv019463_retail.orders").show(truncate=False)

+----------------------------+---------------------------------------------------------------------------------+-------+
|col_name                    |data_type                                                                        |comment|
+----------------------------+---------------------------------------------------------------------------------+-------+
|order_id                    |int                                                                              |null   |
|order_date                  |string                                                                           |null   |
|customer_id                 |int                                                                              |null   |
|order_status                |string                                                                           |null   |
|                            |                                                                                 |       |
|# Detailed Table Information|  

In [21]:
spark.sql("drop table itv019463_retail.orders")

In [23]:
spark.sql("describe itv019463_retail.orders")

AnalysisException: Table or view not found for 'DESCRIBE TABLE': itv019463_retail.orders; line 1 pos 0;
'DescribeRelation false, [col_name#345, data_type#346, comment#347]
+- 'UnresolvedTableOrView [itv019463_retail, orders], DESCRIBE TABLE, true


In [42]:
spark.sql("create table if not exists itv019463_retail.orders_ext (order_id integer, order_date string, customer_id integer, order_status string) using csv location '/user/itv019463/TrendyTechBigData/Week5/data/'")

In [43]:
spark.sql("show tables").show()

+----------------+----------+-----------+
|        database| tableName|isTemporary|
+----------------+----------+-----------+
|itv019463_retail|orders_ext|      false|
|                |    orders|       true|
+----------------+----------+-----------+



In [44]:
spark.sql("describe extended itv019463_retail.orders_ext").show(truncate=False)

+----------------------------+-------------------------------------------------------------------------+-------+
|col_name                    |data_type                                                                |comment|
+----------------------------+-------------------------------------------------------------------------+-------+
|order_id                    |int                                                                      |null   |
|order_date                  |string                                                                   |null   |
|customer_id                 |int                                                                      |null   |
|order_status                |string                                                                   |null   |
|                            |                                                                         |       |
|# Detailed Table Information|                                                                  

In [45]:
spark.sql("Truncate table itv019463_retail.orders_ext")

AnalysisException: Operation not allowed: TRUNCATE TABLE on external tables: `itv019463_retail`.`orders_ext`