**CELL DESCRIPTION**  

**Cell 1** : Importing required modules  
**Cell 2** : Read Data from Delta table "Recipies" to the datafeame  
**Cell 3** : Calculate average cooking time duration per difficulty level  
**Cell 4** : Load the final output to a delta table  
**Cell 5** : Auditing the flow  
**Cell 6 & 7** : Logging

In [0]:
%run ./NB_Logging

/tmp/custom_log2022-07-08-06-03-55.log


In [0]:
# Importing required modules

from pyspark.sql.types import *
from pyspark.sql.functions import *
from datetime import date
import datetime
import logging
startTime = datetime.datetime.now()
status = "Running"

In [0]:
# Reading data from the "Recipies" delta table

df_recipies = spark.read.format("delta").load("dbfs:/user/hive/warehouse/recipies")


In [0]:
# Deriving two new fields as "total_cook_time" and "Difficulty" 

df_final = df_recipies.withColumn("difficulty", when((df_recipies.cookTime_in_minutes+df_recipies.prepTime_in_minutes) <= '30',"EASY").
                                                when((df_recipies.cookTime_in_minutes+df_recipies.prepTime_in_minutes) > '60',"HARD").
                                                when(((df_recipies.cookTime_in_minutes+df_recipies.prepTime_in_minutes) > '30') & 
                                                     ((df_recipies.cookTime_in_minutes+df_recipies.prepTime_in_minutes) <= '60'),"MEDIUM"))\
                      .withColumn("total_cook_time", (df_recipies.cookTime_in_minutes+df_recipies.prepTime_in_minutes))



# Extracting only recipes that have beef as one of the ingredients

df_final = df_final.filter(lower(col("ingredients")).like("%beef%"))


# Calculate average cooking time duration per difficulty level
try :
  df_final = df_final.groupBy("difficulty").agg(round(avg("total_cook_time"),2).alias("avg_total_cooking_time"))
except Exception as e :
  logger.error(e)
  status = "Failed"


In [0]:
# Loading the final output to "Recipies_Out" Table

try :
  df_final.write.format("delta") \
          .mode("overwrite") \
          .saveAsTable("Recipies_Out")
except Exception as e :
  logger.error(e)
  status = "Failed"

In [0]:
# Auditing the execution

today = date.today()
notebook_name = "NB_Task2"
#notebook_name = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()
if status == "Running":
  status = "Completed"
no_of_records_processed = df_recipies.count()
no_of_records_passed = ""
no_of_records_failed = ""
#no_of_records_inserted = df_final.count()
endTime = datetime.datetime.now()
executionTime = (endTime - startTime) 

data2 = [(today,notebook_name,executionTime,status,no_of_records_processed,no_of_records_passed,no_of_records_failed)]

schema = StructType([ \
    StructField("date",DateType(),True), \
    StructField("notebook_name",StringType(),True), \
    StructField("executionTime",StringType(),True), \
    StructField("status",StringType(),True), \
    StructField("no_of_records_processed", StringType(), True), \
    StructField("no_of_records_passed", StringType(), True), \
    StructField("no_of_records_failed", StringType(), True) \
  ])

df_audit = spark.createDataFrame(data=data2,schema=schema)

try :
  df_audit.write.format("csv")\
                .mode("append")\
                .saveAsTable("Audit")
except Exception as e :
  logger.error(e)
  status = "Failed"
  
logging.shutdown()

In [0]:
dbutils.fs.mv("file:"+p_logfile, "dbfs:/FileStore/CustomLogging/"+p_filename)

Out[35]: True

In [0]:
%sql
drop table if exists custom_logging;
create table if not exists custom_logging
using text options(path '/FileStore/CustomLogging/*',header = true)