In [0]:
#SparkSession
from pyspark.sql import SparkSession
from pyspark.sql.functions import split
from pyspark.sql.types import DateType,TimestampType
spark = SparkSession.builder.appName("UseCase").getOrCreate()

In [0]:

def get_latest_data(table_name,timestamp_col):
    l_df = spark.sql(f"SELECT * FROM delta.`{table_name}` WHERE {timestamp_col} = (SELECT MAX({timestamp_col}) FROM delta.`{table_name}`)")
    return l_df
    #latest_df.printSchema()

In [0]:
def apply_transformations(l_df):
    latest_df = l_df.withColumn('imdb_rating', split(l_df['imdb_rating'], '/').getItem(0)) 

    df=latest_df.withColumn("timestamp", latest_df["timestamp"].cast(TimestampType())).withColumn("imdb_rating",latest_df["imdb_rating"].cast("float")).withColumn("released_at", latest_df["released_at"].cast(DateType()))

    final_df=df.select(df.name.alias("movie_name"),df.genre,df.imdb_rating,df.released_at.alias("release_date"),df.streaming_on,df.timestamp)

    #final_df.show()
    return final_df

In [0]:

def write_to_postgres(final_df,database_url,postgres_table,properties):
    try:
        final_df.write.jdbc(url=database_url, table=postgres_table, mode="append", properties=properties)
        return True
    except Exception as e:
        return False

In [0]:
table_name="/delta/dq_table"
timestamp_col="timestamp"

database_url = dbutils.secrets.get('usecaseScope','database_url')
postgres_table = dbutils.secrets.get('usecaseScope','postgres_table')
properties = {
    "user": dbutils.secrets.get('usecaseScope','user'),
    "password": dbutils.secrets.get('usecaseScope','password'),
    "driver": "org.postgresql.Driver"
}
try:
    l_df=get_latest_data(table_name,timestamp_col)
    final_df=apply_transformations(l_df)
    load=write_to_postgres(final_df,database_url,postgres_table,properties)
    print(load)

except Exception as e:
    raise Exception(e)
