### Ingest title_principals file

In [0]:
dbutils.widgets.text("p_data_source", "")
v_data_source = dbutils.widgets.get("p_data_source")

In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

##### Step 1 - Read .tsv file

In [0]:
title_principals_raw_df = spark.table("imdb_dev.bronze.title_principals")

In [0]:
display(title_principals_raw_df)

##### Step 2 - Fix to correct value

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import ArrayType, StringType

In [0]:
# Tranform to array the characters column
title_principals_type_df = title_principals_raw_df \
.withColumn(
    "characters",
    F.when(F.col("characters").isNull(), F.lit(None).cast("string"))
     .otherwise(F.concat_ws(", ", F.from_json("characters", ArrayType(StringType()))))
)

In [0]:
display(title_principals_type_df)

##### Step 3 - Rename Columns

In [0]:
title_principals_renamed_df = title_principals_type_df \
                        .withColumn("data_source", lit(v_data_source))

##### Step 4 - Add ingestion date

In [0]:
title_principals_final_df = add_ingestion_date(title_principals_renamed_df)

In [0]:
display(title_principals_final_df)

##### Step 5 - Write data to datalake as delta table

In [0]:
(
    title_principals_final_df.write
      .format("delta")
      .mode("overwrite")
      .option("overwriteSchema", "true")                                    
      .saveAsTable("imdb_dev.silver.title_principals")
)