# Create/Refresh Movie Transaction table 

## Declare Catalog and Schema for future use

Since we are using Medallion Architecture here we will have Bronze, Silver and Gold schemas. In this code block we are declaring the schema and catalog variables which we will call later on to ignore hardcoding of values.<br>

Refer workbook - Movie-Data-Engineering-Using-Databricks/Create medallion Architecture - movie/Create medallion Architecture - movie/create medellion architecture for movies.ipynb to see how they are created in the system.

In [0]:
catalog = "movie"
b_schema = "movie_bronze"
s_schema = "movie_silver"
g_schema = "movie_gold"

## Create Temporary view to fetch clean Data with Foreign key and Primary Key setup

In [0]:
spark.sql(f"""
SELECT 
  CONCAT('M000', movie.ID) as movie_id,
  movie.Title as movie_title,
  movie.Year as release_year,
  NVL(movie.Rating, 'not rated') as rating,
  CASE 
    WHEN TRY_CAST(movie.IMDb AS DOUBLE) IS NULL OR isnan(TRY_CAST(movie.IMDb AS DOUBLE)) THEN '0'
    ELSE LTRIM(RTRIM(movie.IMDb))
  END as IMDb_rating,
  CASE WHEN movie.`Rotten Tomatoes` = 'na' THEN '0' ELSE LTRIM(RTRIM(NVL(movie.`Rotten Tomatoes`, '0'))) END as Rotten_Tomatoes_rating,
  (
    SELECT master.Genre_id
    FROM movie.movie_silver.movie_genre_master master 
    WHERE movie.Genre = master.Genre 
    LIMIT 1
  ) as Genre_id,
  (
    CASE 
      WHEN movie.Netflix = 1 THEN (
        SELECT platform_id 
        FROM movie.movie_silver.movie_platform_master 
        WHERE platform_name = 'Netflix' 
        LIMIT 1
      )
      WHEN movie.`Amazon Prime Video` = 1 THEN (
        SELECT platform_id 
        FROM movie.movie_silver.movie_platform_master 
        WHERE platform_name = 'Amazon Prime Video' 
        LIMIT 1
      )
      ELSE 'Other' 
    END
  ) as platform_id
FROM movie.movie_bronze.movies movie""").createOrReplaceTempView("movie_temp")

## Create table movie_data_transaction if it's schema doesn't exists in Silver Schema

In [0]:
spark.sql(f"""CREATE TABLE IF NOT EXISTS {catalog}.{s_schema}.movie_data_transaction (
    movie_id STRING,
    movie_title STRING,
    release_year INT,
    rating STRING,
    IMDb_rating STRING,
    Rotten_Tomatoes_rating STRING,
    Genre_id STRING,
    platform_id STRING,

    PRIMARY KEY (movie_id),
    FOREIGN KEY (Genre_id) REFERENCES {catalog}.{s_schema}.movie_genre_master(Genre_id),
    FOREIGN KEY (platform_id) REFERENCES {catalog}.{s_schema}.movie_platform_master(platform_id)) USING DELTA""").display()

## Truncate and Insert data into table

In [0]:
%sql
TRUNCATE TABLE movie.movie_silver.movie_data_transaction ;
INSERT INTO movie.movie_silver.movie_data_transaction  SELECT * FROM movie_temp;

num_affected_rows,num_inserted_rows
24664,24664


## Display Data to test

In [0]:
%sql
SELECT * FROM movie.movie_silver.movie_data_transaction

movie_id,movie_title,release_year,rating,IMDb_rating,Rotten_Tomatoes_rating,Genre_id,platform_id
G0001,Terminator: Dark Fate,2019,18+,6.2,81,G014,P002
G0002,Gemini Man,2019,13+,5.7,74,G014,P002
G0003,Rambo: Last Blood,2019,18+,6.1,72,G014,P002
G0004,The Courier,2019,18+,4.9,50,G014,P002
G0005,Crawl,2019,18+,6.1,79,G014,P002
G0006,The Kill Team,2019,18+,5.9,61,G014,P002
G0007,The Rhythm Section,2020,18+,5.3,63,G014,P002
G0008,Legionnaire's Trail,2020,18+,3.1,39,G014,P002
G0009,The Avengers,2012,13+,8.0,94,G014,P002
G00010,Escape Plan: The Extractors,2019,18+,4.4,49,G014,P002
