In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import *

spark = (SparkSession.builder
             .master("spark://spark-master:7077") # Points to the Spark Cluster
             .appName('schema-test') # Name the app
             .config("hive.metastore.uris", "thrift://hive-metastore:9083") # Set external Hive Metastore
             .config("hive.metastore.warehouse.dir", "hdfs://hdfs-namenode:9000/hadoop/warehouse/") # Set default warehouse dir (legacy) users/hive/warehouse
             .config("spark.sql.warehouse.dir", "hdfs://hdfs-namenode:9000/hadoop/warehouse/") # Set default warehouse dir
             .config("hive.metastore.schema.verification", "false") # Prevent some errors
             .config("fs.defaultFS", "hdfs://hdfs-namenode:9000/") # Set default file system into the HDFS namenode
             .enableHiveSupport()
             .getOrCreate())

sc = spark.sparkContext

In [4]:
df = spark.createDataFrame([
    {"name": "Jhon", "age": 35},
    {"name": "Eric", "age": 31}
], schema=StructType([StructField("name", StringType(), True), StructField("age", IntegerType(), True)]))

df.show()

+----+---+
|name|age|
+----+---+
|Jhon| 35|
|Eric| 31|
+----+---+



In [3]:
df.printSchema()

root
 |-- name: string (nullable = true)
 |-- age: integer (nullable = true)



In [5]:
df.write.mode("overwrite").parquet("hdfs://hdfs-namenode:9000/hadoop/warehouse/test/")

In [2]:
spark.sql("CREATE EXTERNAL TABLE default.persons (name string, age int) USING PARQUET LOCATION 'hdfs://hdfs-namenode:9000/hadoop/warehouse/persons/'").show()

++
||
++
++



In [2]:
spark.sql("show databases").show()

+---------+
|namespace|
+---------+
|  default|
+---------+



In [3]:
spark.sql("show tables").show()

+---------+---------+-----------+
|namespace|tableName|isTemporary|
+---------+---------+-----------+
|  default|  persons|      false|
+---------+---------+-----------+



In [6]:
df.write.insertInto("default.persons")

In [3]:
spark.sql("select * from default.persons").show()

+----+---+
|name|age|
+----+---+
|Jhon| 35|
|Jhon| 35|
|Eric| 31|
|Eric| 31|
|Jhon| 35|
|Eric| 31|
+----+---+

