## Creating Spark Session

In [5]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .master("local[*]") \
    .appName("SparkTest") \
    .config("spark.sql.warehouse.dir", r"C:\Users\deepe\OneDrive\Desktop\Python_spark\DB_managed") \
    .enableHiveSupport() \
    .getOrCreate()


## Creating Database 

In [6]:
spark.sql("CREATE DATABASE SCHOOLS")

DataFrame[]

## Checking if we created or not 

In [7]:
spark.sql('''
SHOW DATABASES
''').show()

+---------+
|namespace|
+---------+
|  default|
|  schools|
+---------+



## Creating Table in school

In [8]:
spark.sql("""
CREATE TABLE SCHOOLS.STUDENTS (
    ID INT,
    NAME STRING,
    CLASS STRING
)
""")


DataFrame[]

## Adding Data

In [9]:
spark.sql(''' INSERT INTO SCHOOLS.STUDENTS VALUES
(1,'RAM','10th'),
(2,'SHAM','10th'),
(3,'AMAN','10th')
''').show()

++
||
++
++



## Checking the data

In [10]:
spark.sql(''' select * from SCHOOLS.STUDENTS
''').show()

+---+----+-----+
| ID|NAME|CLASS|
+---+----+-----+
|  2|SHAM| 10th|
|  3|AMAN| 10th|
|  1| RAM| 10th|
+---+----+-----+



## Checking the table type

In [12]:
spark.sql("""
DESCRIBE FORMATTED SCHOOLS.STUDENTS
""").show(truncate=False)

+----------------------------+---------------------------------------------------------------------------------+-------+
|col_name                    |data_type                                                                        |comment|
+----------------------------+---------------------------------------------------------------------------------+-------+
|ID                          |int                                                                              |NULL   |
|NAME                        |string                                                                           |NULL   |
|CLASS                       |string                                                                           |NULL   |
|                            |                                                                                 |       |
|# Detailed Table Information|                                                                                 |       |
|Catalog                     |sp

## Creating External Table

In [25]:
spark.sql(r"""
CREATE TABLE IF NOT EXISTS TEACHERS_EXT (
    ID INT,
    NAME STRING,
    SUBJECT STRING
)
USING PARQUET
LOCATION 'C:/Users/deepe/OneDrive/Desktop/Python_spark/DB_External'
""")


DataFrame[]

In [23]:
#spark.sql('drop table TEACHERS_EXT')

DataFrame[]

## Adding data in the table above 

In [28]:
spark.sql("""
INSERT INTO TEACHERS_EXT VALUES
(1, 'Ravi', 'Math'),
(2, 'Anita', 'Science'),
(3, 'John', 'English')
""")


DataFrame[]

In [27]:
spark.sql('show tables').show()

+---------+------------+-----------+
|namespace|   tableName|isTemporary|
+---------+------------+-----------+
|  default|teachers_ext|      false|
+---------+------------+-----------+



## Checking the data 

In [29]:
spark.sql(''' select * from teachers_ext
''').show()

+---+-----+-------+
| ID| NAME|SUBJECT|
+---+-----+-------+
|  2|Anita|Science|
|  2|Anita|Science|
|  3| John|English|
|  3| John|English|
|  1| Ravi|   Math|
|  1| Ravi|   Math|
+---+-----+-------+



## Checking the table type

In [30]:
spark.sql("""
DESCRIBE FORMATTED TEACHERS_EXT
""").show(truncate=False)


+----------------------------+--------------------------------------------------------------+-------+
|col_name                    |data_type                                                     |comment|
+----------------------------+--------------------------------------------------------------+-------+
|ID                          |int                                                           |NULL   |
|NAME                        |string                                                        |NULL   |
|SUBJECT                     |string                                                        |NULL   |
|                            |                                                              |       |
|# Detailed Table Information|                                                              |       |
|Catalog                     |spark_catalog                                                 |       |
|Database                    |default                                             

In [31]:
spark.sql('''
drop table schools.students
''').show()

++
||
++
++



In [33]:
spark.sql('''
select * from  schools.students
''').show()




In [34]:
spark.sql('drop database schools')

DataFrame[]

## Deleting the External table 

In [35]:
spark.sql('''
drop table  TEACHERS_EXT
''').show()

++
||
++
++



In [36]:
spark.sql('''
SELECT * FROM  TEACHERS_EXT
''').show()

{"ts": "2026-01-23 12:05:06.461", "level": "ERROR", "logger": "SQLQueryContextLogger", "msg": "[TABLE_OR_VIEW_NOT_FOUND] The table or view `TEACHERS_EXT` cannot be found. Verify the spelling and correctness of the schema and catalog.\nIf you did not qualify the name with a schema, verify the current_schema() output, or qualify the name with the correct schema and catalog.\nTo tolerate the error on drop use DROP VIEW IF EXISTS or DROP TABLE IF EXISTS. SQLSTATE: 42P01", "context": {"errorClass": "TABLE_OR_VIEW_NOT_FOUND"}, "exception": {"class": "Py4JJavaError", "msg": "An error occurred while calling o32.sql.\n: org.apache.spark.sql.AnalysisException: [TABLE_OR_VIEW_NOT_FOUND] The table or view `TEACHERS_EXT` cannot be found. Verify the spelling and correctness of the schema and catalog.\nIf you did not qualify the name with a schema, verify the current_schema() output, or qualify the name with the correct schema and catalog.\nTo tolerate the error on drop use DROP VIEW IF EXISTS or DRO

AnalysisException: [TABLE_OR_VIEW_NOT_FOUND] The table or view `TEACHERS_EXT` cannot be found. Verify the spelling and correctness of the schema and catalog.
If you did not qualify the name with a schema, verify the current_schema() output, or qualify the name with the correct schema and catalog.
To tolerate the error on drop use DROP VIEW IF EXISTS or DROP TABLE IF EXISTS. SQLSTATE: 42P01; line 2 pos 15;
'Project [*]
+- 'UnresolvedRelation [TEACHERS_EXT], [], false


## Reading predefined data

In [38]:
spark.sql(r"""
CREATE TABLE students_ext_dir
USING CSV
OPTIONS (
    path r"C:\Users\deepe\OneDrive\Desktop\Python_spark\Dataset\students.csv",
    header 'true',
    inferSchema 'true'
)
""")


DataFrame[]

In [39]:
spark.sql(' select * from students_ext_dir').show()

+---+-----+-----+
| id| name|class|
+---+-----+-----+
|  1| Ravi|   10|
|  2|Anita|    9|
|  3| John|   10|
|  4|Priya|    8|
|  5| Aman|    9|
|  6|Sneha|   10|
|  7|Karan|    8|
|  8| Neha|    9|
|  9|Vikas|   10|
| 10|Pooja|    8|
+---+-----+-----+



In [42]:
spark.sql('describe extended students_ext_dir').show(truncate = False)

+----------------------------+-----------------------------------------------------------------------+-------+
|col_name                    |data_type                                                              |comment|
+----------------------------+-----------------------------------------------------------------------+-------+
|id                          |int                                                                    |NULL   |
|name                        |string                                                                 |NULL   |
|class                       |int                                                                    |NULL   |
|                            |                                                                       |       |
|# Detailed Table Information|                                                                       |       |
|Catalog                     |spark_catalog                                                          |       |
|

In [43]:
spark.sql('drop table students_ext_dir').show() 

++
||
++
++



In [44]:
spark.sql('select * from  students_ext_dir').show() 

{"ts": "2026-01-23 12:11:27.216", "level": "ERROR", "logger": "SQLQueryContextLogger", "msg": "[TABLE_OR_VIEW_NOT_FOUND] The table or view `students_ext_dir` cannot be found. Verify the spelling and correctness of the schema and catalog.\nIf you did not qualify the name with a schema, verify the current_schema() output, or qualify the name with the correct schema and catalog.\nTo tolerate the error on drop use DROP VIEW IF EXISTS or DROP TABLE IF EXISTS. SQLSTATE: 42P01", "context": {"errorClass": "TABLE_OR_VIEW_NOT_FOUND"}, "exception": {"class": "Py4JJavaError", "msg": "An error occurred while calling o32.sql.\n: org.apache.spark.sql.AnalysisException: [TABLE_OR_VIEW_NOT_FOUND] The table or view `students_ext_dir` cannot be found. Verify the spelling and correctness of the schema and catalog.\nIf you did not qualify the name with a schema, verify the current_schema() output, or qualify the name with the correct schema and catalog.\nTo tolerate the error on drop use DROP VIEW IF EXIST

AnalysisException: [TABLE_OR_VIEW_NOT_FOUND] The table or view `students_ext_dir` cannot be found. Verify the spelling and correctness of the schema and catalog.
If you did not qualify the name with a schema, verify the current_schema() output, or qualify the name with the correct schema and catalog.
To tolerate the error on drop use DROP VIEW IF EXISTS or DROP TABLE IF EXISTS. SQLSTATE: 42P01; line 1 pos 15;
'Project [*]
+- 'UnresolvedRelation [students_ext_dir], [], false
