In [0]:
dbutils.widgets.text("p_environment", "")

In [0]:
dbutils.widgets.get("p_environment")
v_environment = dbutils.widgets.get("p_environment")
print(v_environment)

## Ingestion del archivo person.json

### Paso 1- Leer el archivo JSON usando "DataFrameReader" de Spark

In [0]:
person_schema = "personId INT, personName STRUCT<forename: STRING, surname: STRING>"

In [0]:
person_df = spark.read \
    .schema(person_schema) \
    .json("/mnt/moviehistoryl/bronce/person.json")

In [0]:
person_df.printSchema()

In [0]:
person_df.display()

In [0]:
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

In [0]:
name_schema = StructType(fields= [
    StructField("forename", StringType(), True),
    StructField("surname", StringType(), True)
])

In [0]:
person_schema = StructType(fields=[
    StructField("personId", IntegerType(), False),
    StructField("personName", name_schema, True)
])

In [0]:
person_df = spark.read \
    .schema(person_schema) \
    .json("/mnt/moviehistoryl/bronce/person.json")

In [0]:
person_df.display()

### Paso 2 - Cambiar el nombre de las columnas y añadir "ingestion_date" y "evironment"
1. "personId" renombrar a "person_id"
2. Agregar las columnas "ingestion_date" y environment"
3. Agregar la columna "name" a partir de la concatenación de forename" y "surname"

In [0]:
from pyspark.sql.functions import col, concat, current_timestamp, lit

In [0]:
persons_with_columns_df = person_df \
    .withColumnRenamed("personId", "person_id") \
    .withColumn("ingestion_date", current_timestamp()) \
    .withColumn("environment", lit(v_environment)) \
    .withColumn("name", concat(
        col("personName.forename"), 
        lit(" "), 
        col("personName.surname"))) \
    .drop( "personName")
persons_with_columns_df.display()

### Paso 3 - Escribir la salida en formato parquet

In [0]:
persons_with_columns_df.write.mode("overwrite").parquet("/mnt/moviehistoryl/silver/person")

In [0]:
spark.read.parquet("/mnt/moviehistoryl/silver/person").display()

In [0]:
dbutils.notebook.exit("success")