## Iceberg Hive Tables With Custom Catalog

In [None]:
%%configure -f
{ "conf": {"spark.jars.packages": "org.apache.iceberg:iceberg-spark-runtime-3.1_2.12:1.1.0,io.delta:delta-core_2.12:1.0.1,org.apache.iceberg:iceberg-hive-runtime:1.1.0",
           "spark.sql.extensions":"org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension",
           "spark.sql.catalog.spark_catalog":"org.apache.spark.sql.delta.catalog.DeltaCatalog",
           "spark.sql.catalog.iceberg":"org.apache.iceberg.spark.SparkCatalog",
           "spark.sql.catalog.iceberg.type":"hive",
           "spark.sql.catalog.iceberg.warehouse":"/iceberg/warehouse"
          }
}

# Spark Catalog Tables
iceberg_table is Iceberg Parquet Table, and spark_table is Spark Parquet Table; both of these will be created in Spark Catalog
* Assumption: You spark configuration (from Ambari) `metastore.catalog.default` is still using 'spark'


In [None]:
spark.sql("""CREATE TABLE IF NOT EXISTS iceberg_table (id string,
               creation_date string,
                 last_update_time string) USING iceberg""")
spark.sql("""CREATE TABLE IF NOT EXISTS spark_table (id string,
                creation_date string,
                last_update_time string)""")

In [None]:
%%sql
show tables;

## Iceberg Hive Tables With Custom Catalog
Create Customer Iceberg Table in Hive Catalog

In [None]:
import org.apache.iceberg.hive.HiveCatalog
import org.apache.iceberg.types.Types
import org.apache.iceberg.{PartitionSpec, TableProperties, Schema => IcebergSchema}
import org.apache.iceberg.CatalogProperties
import org.apache.spark.sql.SparkSession

val catalogName = "iceberg"
val nameSpace = "default"
val tableName = "customer"

def createTableByHiveCatalog(spark: SparkSession): Unit = {
    import scala.collection.JavaConverters._
    // table specification starts
    val schema= new IcebergSchema(
      Types.NestedField.required(1, "id", Types.IntegerType.get()),
      Types.NestedField.required(2, "name", Types.StringType.get()),
      Types.NestedField.required(3, "state", Types.StringType.get())
    )
    val spec = PartitionSpec.builderFor(schema).bucket("state", 128).build()
    import org.apache.iceberg.catalog.TableIdentifier
    val tableIdentifier: TableIdentifier = TableIdentifier.of(nameSpace,tableName)
    val tblProperties = Map(TableProperties.ENGINE_HIVE_ENABLED->"true","iceberg.catalog"->"iceberg")
    // table specification ends
    val catalog = new HiveCatalog()
    catalog.setConf(spark.sparkContext.hadoopConfiguration)
    val properties = Map(CatalogProperties.WAREHOUSE_LOCATION->"/iceberg/warehouse/")
    catalog.initialize(catalogName, properties.asJava)
    catalog.createTable(tableIdentifier, schema, spec,s"/iceberg/warehouse/${tableName}",tblProperties.asJava)
}

In [None]:
//create Hive Catalog Table - External
createTableByHiveCatalog(spark)

# Insert Data Into Customer

In [None]:
%%sql
INSERT INTO iceberg.default.customer VALUES (1,"A","State1"),(2,"B","State2"),(3,"C","State2")

## Query Metadata Tables

In [None]:
%%sql
SELECT * FROM iceberg.default.customer.files

## Query Using Spark

In [None]:
val df = spark.table("iceberg.default.customer")
df.show()