In [0]:
%run ./movie_utilities

In [0]:

rawPath = 'dbfs:/FileStore/tables/08232022/raw'
bronzePath = 'dbfs:/FileStore/tables/final_proj/bronze'
silverPathMovie = 'dbfs:/FileStore/tables/final_proj/silver_movie'
silverPathGenreLookup = 'dbfs:/FileStore/tables/final_proj/silver_genre_lookup'
silverPathGenreJunc = 'dbfs:/FileStore/tables/final_proj/silver_genre_movie_junc'
silverPathLanguage = 'dbfs:/FileStore/tables/final_proj/silver_language_lookup'

# Raw to Bronze Pipeline

In [0]:
rawDF = read_batch_raw(rawPath)
transformedRawDF = transform_raw(rawDF)
rawToBronzeWriter = batch_writer(
    raw_dataframe=transformedRawDF, partition_column="p_ingestdate"
)

rawToBronzeWriter.save(bronzePath)

In [0]:
create_table(tableName='movie_bronze', deltaPath=bronzePath)

# Bronze to Silver Pipeline

In [0]:
bronzeDF=read_batch_bronze(spark)
silver_movie, genre_lookup, genre_movie_junction, language_lookup = generate_silverDF(bronzeDF)

In [0]:
silver_movie_clean, silver_movie_quarantine = generate_clean_and_quarantine_dataframes(silver_movie)

In [0]:
bronzeToSilverWriter_movie = batch_writer(
    dataframe=silver_movie_clean, partition=True, partition_column="p_createddate", exclude_columns=["value"]
)
bronzeToSilverWriter_movie.save(silverPathMovie)

In [0]:
create_table(tableName='silver_movie', deltaPath=silverPathMovie)

In [0]:
bronzeToSilverWriter_genre_lookup = batch_writer(
    dataframe=genre_lookup
)
bronzeToSilverWriter_genre_lookup.save(silverPathGenreLookup)

In [0]:
create_table(tableName='silver_genre_lookup', deltaPath=silverPathGenreLookup)

In [0]:
bronzeToSilverWriter_genre_junc = batch_writer(
    dataframe=genre_movie_junction
)
bronzeToSilverWriter_genre_junc.save(silverPathGenreJunc)

In [0]:
create_table(tableName='silver_genre_movie_junc', deltaPath=silverPathGenreJunc)

In [0]:
bronzeToSilverWriter_language = batch_writer(
    dataframe=language_lookup
)
bronzeToSilverWriter_language.save(silverPathLanguage)

In [0]:
create_table(tableName='silver_language_lookup', deltaPath=silverPathLanguage)

# Update Bronze Table

In [0]:
update_bronze_table_status(spark, bronzePath, silver_movie_clean, "loaded")

In [0]:
update_bronze_table_status(spark, bronzePath, silver_movie_quarantine, "quarantined")

# Silver Updates

In [0]:
bronzeQuarantinedDF = spark.read.table("movie_bronze").filter(
    "status = 'quarantined'"
)

In [0]:
silver_movie_quarantined, genre_lookup, genre_movie_junction, language_lookup = generate_silverDF(bronzeQuarantinedDF)

In [0]:
from pyspark.sql.functions import abs
transformedQuarantinedDF = silver_movie_quarantined.withColumn("Runtime", abs(col('Runtime')))

In [0]:
bronzeToSilverWriter_movieQuarantined = batch_writer(
    dataframe=transformedQuarantinedDF, partition=True, partition_column="p_createddate", exclude_columns=["value"]
)
bronzeToSilverWriter_movieQuarantined.save(silverPathMovie)

In [0]:
update_bronze_table_status(spark, bronzePath, transformedQuarantinedDF, "loaded")