In [81]:
from pyspark.sql import SparkSession
import getpass

username = getpass.getuser()

spark = SparkSession.builder \
    .appName("Shubham-M") \
    .master("yarn") \
    .config("spark.ui.port", "0") \
    .config("spark.sql.warehouse.dir", f"/user/{username}/warehouse") \
    .enableHiveSupport() \
    .getOrCreate()

In [82]:
spark

In [5]:
spark.sql("create database malaiDB")

In [19]:
spark.sql("show databases like 'malai*' ").show()

+---------+
|namespace|
+---------+
|  malaidb|
+---------+



In [27]:
spark.sql("use malaiDB")

In [40]:
spark.sql("drop table malaiDB.orders")

In [41]:
spark.sql("create table malaiDB.orders(order_id integer, order_date string, customer_id integer,order_status string)")

In [55]:
spark.sql("show tables").show()

+--------+-----------+-----------+
|database|  tableName|isTemporary|
+--------+-----------+-----------+
| malaidb|     orders|      false|
|        |     orders|       true|
|        |orders_temp|       true|
+--------+-----------+-----------+



In [43]:
orders_df = spark.read.csv("/public/trendytech/orders_wh/orders_wh.csv", header = "true",inferSchema = "true")

In [44]:
orders_df.show()

+--------+--------------------+-----------+---------------+
|order_id|          order_date|customer_id|   order_status|
+--------+--------------------+-----------+---------------+
|       1|2013-07-25 00:00:...|      11599|         CLOSED|
|       2|2013-07-25 00:00:...|        256|PENDING_PAYMENT|
|       3|2013-07-25 00:00:...|      12111|       COMPLETE|
|       4|2013-07-25 00:00:...|       8827|         CLOSED|
|       5|2013-07-25 00:00:...|      11318|       COMPLETE|
|       6|2013-07-25 00:00:...|       7130|       COMPLETE|
|       7|2013-07-25 00:00:...|       4530|       COMPLETE|
|       8|2013-07-25 00:00:...|       2911|     PROCESSING|
|       9|2013-07-25 00:00:...|       5657|PENDING_PAYMENT|
|      10|2013-07-25 00:00:...|       5648|PENDING_PAYMENT|
|      11|2013-07-25 00:00:...|        918| PAYMENT_REVIEW|
|      12|2013-07-25 00:00:...|       1837|         CLOSED|
|      13|2013-07-25 00:00:...|       9149|PENDING_PAYMENT|
|      14|2013-07-25 00:00:...|       98

In [51]:
orders_df.createOrReplaceTempView("orders_temp")

In [52]:
spark.sql("select * from orders_temp").show()

+--------+--------------------+-----------+---------------+
|order_id|          order_date|customer_id|   order_status|
+--------+--------------------+-----------+---------------+
|       1|2013-07-25 00:00:...|      11599|         CLOSED|
|       2|2013-07-25 00:00:...|        256|PENDING_PAYMENT|
|       3|2013-07-25 00:00:...|      12111|       COMPLETE|
|       4|2013-07-25 00:00:...|       8827|         CLOSED|
|       5|2013-07-25 00:00:...|      11318|       COMPLETE|
|       6|2013-07-25 00:00:...|       7130|       COMPLETE|
|       7|2013-07-25 00:00:...|       4530|       COMPLETE|
|       8|2013-07-25 00:00:...|       2911|     PROCESSING|
|       9|2013-07-25 00:00:...|       5657|PENDING_PAYMENT|
|      10|2013-07-25 00:00:...|       5648|PENDING_PAYMENT|
|      11|2013-07-25 00:00:...|        918| PAYMENT_REVIEW|
|      12|2013-07-25 00:00:...|       1837|         CLOSED|
|      13|2013-07-25 00:00:...|       9149|PENDING_PAYMENT|
|      14|2013-07-25 00:00:...|       98

In [53]:
spark.sql("insert into malaiDB.orders select * from orders_temp")

In [54]:
spark.sql("select * from orders").show()

+--------+--------------------+-----------+---------------+
|order_id|          order_date|customer_id|   order_status|
+--------+--------------------+-----------+---------------+
|       1|2013-07-25 00:00:...|      11599|         CLOSED|
|       2|2013-07-25 00:00:...|        256|PENDING_PAYMENT|
|       3|2013-07-25 00:00:...|      12111|       COMPLETE|
|       4|2013-07-25 00:00:...|       8827|         CLOSED|
|       5|2013-07-25 00:00:...|      11318|       COMPLETE|
|       6|2013-07-25 00:00:...|       7130|       COMPLETE|
|       7|2013-07-25 00:00:...|       4530|       COMPLETE|
|       8|2013-07-25 00:00:...|       2911|     PROCESSING|
|       9|2013-07-25 00:00:...|       5657|PENDING_PAYMENT|
|      10|2013-07-25 00:00:...|       5648|PENDING_PAYMENT|
|      11|2013-07-25 00:00:...|        918| PAYMENT_REVIEW|
|      12|2013-07-25 00:00:...|       1837|         CLOSED|
|      13|2013-07-25 00:00:...|       9149|PENDING_PAYMENT|
|      14|2013-07-25 00:00:...|       98

In [56]:
spark.sql("select * from orders where order_status = 'CLOSED'").show()

+--------+--------------------+-----------+------------+
|order_id|          order_date|customer_id|order_status|
+--------+--------------------+-----------+------------+
|       1|2013-07-25 00:00:...|      11599|      CLOSED|
|       4|2013-07-25 00:00:...|       8827|      CLOSED|
|      12|2013-07-25 00:00:...|       1837|      CLOSED|
|      18|2013-07-25 00:00:...|       1205|      CLOSED|
|      24|2013-07-25 00:00:...|      11441|      CLOSED|
|      25|2013-07-25 00:00:...|       9503|      CLOSED|
|      37|2013-07-25 00:00:...|       5863|      CLOSED|
|      51|2013-07-25 00:00:...|      12271|      CLOSED|
|      57|2013-07-25 00:00:...|       7073|      CLOSED|
|      61|2013-07-25 00:00:...|       4791|      CLOSED|
|      62|2013-07-25 00:00:...|       9111|      CLOSED|
|      87|2013-07-25 00:00:...|       3065|      CLOSED|
|      90|2013-07-25 00:00:...|       9131|      CLOSED|
|     101|2013-07-25 00:00:...|       5116|      CLOSED|
|     116|2013-07-26 00:00:...|

In [58]:
spark.sql("describe extended  malaidb.orders").show(truncate = False)

+----------------------------+------------------------------------------------------------------------+-------+
|col_name                    |data_type                                                               |comment|
+----------------------------+------------------------------------------------------------------------+-------+
|order_id                    |int                                                                     |null   |
|order_date                  |string                                                                  |null   |
|customer_id                 |int                                                                     |null   |
|order_status                |string                                                                  |null   |
|                            |                                                                        |       |
|# Detailed Table Information|                                                                        | 

In [59]:
"CRAETE AN EXTERNAL TABLE"

'CRAETE AN EXTERNAL TABLE'

In [69]:
spark.sql("create table malaiDB.orders_external(order_id integer, order_date string, customer_id integer,order_status string)  location 'user/itv023333/data/table_external' ")

In [70]:
spark.sql("show tables").show()

+--------+---------------+-----------+
|database|      tableName|isTemporary|
+--------+---------------+-----------+
| malaidb|         orders|      false|
| malaidb|orders_external|      false|
|        |         orders|       true|
|        |    orders_temp|       true|
+--------+---------------+-----------+



In [71]:
spark.sql("describe extended  malaidb.orders_external").show()

+--------------------+--------------------+-------+
|            col_name|           data_type|comment|
+--------------------+--------------------+-------+
|            order_id|                 int|   null|
|          order_date|              string|   null|
|         customer_id|                 int|   null|
|        order_status|              string|   null|
|                    |                    |       |
|# Detailed Table ...|                    |       |
|            Database|             malaidb|       |
|               Table|     orders_external|       |
|               Owner|           itv023333|       |
|        Created Time|Wed Dec 24 04:26:...|       |
|         Last Access|             UNKNOWN|       |
|          Created By|         Spark 3.1.2|       |
|                Type|            EXTERNAL|       |
|            Provider|                hive|       |
|    Table Properties|[transient_lastDd...|       |
|            Location|hdfs://m01.itvers...|       |
|       Serd

In [73]:
spark.sql("DESCRIBE FORMATTED malaidb.orders_external")

col_name,data_type,comment
order_id,int,
order_date,string,
customer_id,int,
order_status,string,
,,
# Detailed Table ...,,
Database,malaidb,
Table,orders_external,
Owner,itv023333,
Created Time,Wed Dec 24 04:26:...,


In [74]:
spark.sql("DESCRIBE FORMATTED malaidb.orders")

col_name,data_type,comment
order_id,int,
order_date,string,
customer_id,int,
order_status,string,
,,
# Detailed Table ...,,
Database,malaidb,
Table,orders,
Owner,itv023333,
Created Time,Wed Dec 24 04:06:...,
