In [20]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("Hadoop_Spark_Hive_Integration") \
    .config("spark.sql.catalogImplementation", "hive") \
    .config("spark.hadoop.fs.defaultFS", "hdfs://namenode:9000") \
    .config("spark.sql.warehouse.dir", "hdfs://namenode:9000/user/hive/warehouse") \
    .config("hive.metastore.uris", "thrift://hive-metastore:9083") \
    .enableHiveSupport() \
    .getOrCreate()

# Check databases
spark.sql("SHOW DATABASES").show()
spark.sql("SHOW TABLES").show()
spark.sql("DESCRIBE FORMATTED test_table").show(truncate=False)


+---------+
|namespace|
+---------+
|  default|
+---------+

+---------+----------+-----------+
|namespace| tableName|isTemporary|
+---------+----------+-----------+
|  default|test_table|      false|
+---------+----------+-----------+

+----------------------------+--------------------------------------------------------------+-------+
|col_name                    |data_type                                                     |comment|
+----------------------------+--------------------------------------------------------------+-------+
|id                          |int                                                           |null   |
|name                        |string                                                        |null   |
|                            |                                                              |       |
|# Detailed Table Information|                                                              |       |
|Database                    |default            

In [17]:
# Check if Hive is accessible
print("Available Databases:")
spark.sql("SHOW DATABASES").show()

Available Databases:
+---------+
|namespace|
+---------+
|  default|
+---------+



In [18]:
# Use the default database
spark.sql("USE default")

DataFrame[]

In [10]:
# Create a test table
spark.sql("""
    CREATE TABLE IF NOT EXISTS test_table (
        id INT,
        name STRING
    )
    STORED AS PARQUET
""")

DataFrame[]

In [11]:
# Insert test data
spark.sql("INSERT INTO test_table VALUES (1, 'Alice'), (2, 'Bob'), (3, 'Charlie')")

DataFrame[]

In [12]:
# Show tables to verify table creation
print("Available Tables:")
spark.sql("SHOW TABLES").show()

Available Tables:
+---------+----------+-----------+
|namespace| tableName|isTemporary|
+---------+----------+-----------+
|  default|test_table|      false|
+---------+----------+-----------+



In [13]:
# Query the table
print("Table Data:")
spark.sql("SELECT * FROM test_table").show()

Table Data:
+---+-------+
| id|   name|
+---+-------+
|  3|Charlie|
|  1|  Alice|
|  2|    Bob|
+---+-------+



In [15]:
spark.conf.get("hive.metastore.uris")


'thrift://hive-metastore:9083'