In [1]:
%run "./ADP_Farmatic_Def"

In [2]:
#################################################################################
""" Process functions for FARMATIC FILES QA DATA GENERATION

"""
 #Who                 When           What
 #Victor Salesa       20/11/2018     Initial Version
 #Victor Salesa       21/11/2018     Added QA_SERIALIZE_FARMATIC_FILE_NAME_VALIDATION_DATA to encapsulate data serialization for file name validation
 #Victor Salesa       11/01/2019     __QA_CANONIZE_FARMATIC_CTL_PROCESS_FILE_DATA__ added VALIDATION_TYPE column to output with value "E" for external validation
 #Victor Salesa       11/01/2019     QA_SERIALIZE_FARMATIC_FILE_NAME_VALIDATION_DATA changed write db output to CTL.PROCESS_FILE_VAL
 #Victor Salesa       11/01/2019     QA_SERIALIZE_FARMATIC_FILE_NAME_VALIDATION_DATA changed write file output to ctl_process_file_val 
 #Victor Salesa       18/03/2019     QA_CANONIZE_FARMATIC_CTL_PROCESS_FILE_DATA added STATUS logic to be -1 when the file is not correct.
 #Victor Salesa       18/03/2019     QA_CANONIZE_FARMATIC_CTL_PROCESS_FILE_DATA Changed START_DATE to be taken when the process was really initiated (FMT_Data_ValidateFiles) and END_DATE just before the process writes to DB ADP_Farmatic_QA
 #Ana Perez           16/04/2019     Included log managment and exception managment
 #Victor Salesa       26/04/2019     Moved CTL.STG_PROCESS_FILE_VAL and CTL.STG_PROCESS_FILE_VAL to variable  
################################################################################

####QUALITY DEF##########################################################################################################################
#Pending Unify variables with ADP_Farmatic Sellout

__QUALITY_PFV_DB_TABLE_NAME__='CTL.STG_PROCESS_FILE_VAL' 

__QUALITY_PFV_H_TABLE_NAME__ = 'CTL_PROCESS_FILE_VAL'

__QUALITY_PFV_H_FILE_PATH__  = __QUALITY_BASE_PATH__ + 'ctl_process_file_val'

########################################################################################################################################

def __QA_CANONIZE_FARMATIC_CTL_PROCESS_FILE_DATA__(df):
  try:
   #Get Name of the validation columns
    columns_ok = list(filter(lambda col: '_ok' in col ,df.columns))
    #Get Original name of the slicing name columns (file_name generated afterwards)
    drop_columns_org = list(filter(lambda c: 'pharmacy_unique_code' not in c and 'file_type' not in c,map(lambda v: v.replace("_ok",""),columns_ok)))

    #Generate Temp Json Strings Columns for error columns
    cols_error      = [when(col(column)==True,'"'+column.replace("_ok","").upper()+'":"0"').otherwise('"'+column.replace("_ok","").upper()+'":"1"').alias(''.join([ word[0:1] for word in column.replace("_ok","").upper().split("_")])+"_TMP") for column in columns_ok]

    #Generate Temp Json Strings Columns for raw value columns 
    quote_open = '"'; quote_colon_end ='":'
    cols_raw      = [when(col(column)==True,concat(lit(quote_open+column.replace("_ok","").upper()+quote_colon_end),   concat(lit('"'),col(column.replace("_ok","")),lit('"'))  )).otherwise(concat(lit(quote_open+column.replace("_ok","").upper()+quote_colon_end),concat(lit('"'),col(column.replace("_ok","")),lit('"'))  )).alias(''.join([ word[0:1] for word in column.replace("_ok","").upper().split("_")])+"_RTMP") for column in columns_ok]

    #Generate Drop Operations for Temp Json Strings 
    drop_cols_error = [''.join([word[0:1] for word in column.replace("_ok","").upper().split("_")])+"_TMP" for column in columns_ok]
    drop_cols_raw   = list(map(lambda v: v.replace('_TMP','_RTMP'),drop_cols_error))

    #Generate Json String Column for Errors
    json_cols_error_text  = list(filter(None,('|'+"""|COMMA|""".join([''.join([word[0:1] for word in column.replace("_ok","").upper().split("_")])+"_TMP" for column in columns_ok])+'|').split('|')))
    json_cols_error = list(map(lambda v: col(v),json_cols_error_text))

    #Generate Json String Column for Raw Values
    json_cols_raw_text  = list(map(lambda v: v.replace('_TMP','_RTMP'),json_cols_error_text))
    json_cols_raw =  list(map(lambda v: col(v),json_cols_raw_text))

    file_db = (df.repartition(32)
      .withColumn("file_name",col("name"))
      .select("*",*iter(cols_error),*iter(cols_raw),lit(',').alias('COMMA'))
      .select("*",concat(lit("{"),*iter(json_cols_error),lit("}")).alias("ERRORS"))
      .select("*",concat(lit("{"),*iter(json_cols_raw),lit("}")).alias("RAW_VALUE"))
      .withColumn("STATUS_OK",col("file_name_ok"))
      .drop(*iter(drop_cols_error))
      .drop(*iter(drop_cols_raw))
      .drop(*iter(columns_ok))
      .drop(*iter(drop_columns_org))
      .drop("COMMA")
      .select(
        col("Name").cast(StringType()).alias("FILE_NAME"),
        col("file_timestamp").cast(TimestampType()).alias("LANDING_DATE"),
        lit('E').cast(StringType()).alias("VALIDATION_TYPE"),
        lit('FMT').cast(StringType()).alias("PMS_CODE"),
        lit('ES').cast(StringType()).alias("COUNTRY_CODE"),
        col("pharmacy_unique_code").cast(StringType()).alias("PHARMACY_CODE"),
        col("file_type").cast(StringType()).alias("BUSINESS_AREA"),
        col("START_DATE"),
        from_unixtime(unix_timestamp(current_timestamp())).cast(TimestampType()).alias("END_DATE"),

        when(col("STATUS_OK")==True,lit(0)).otherwise(lit(-1)).cast(DecimalType(18,0)).alias("STATUS"),
        col("RAW_VALUE").cast(StringType()).alias("MESSAGE_TEXT"),
        col("ERRORS").cast(StringType()).alias("ERROR_CODE")
     )
     .drop("STATUS_OK")
    )  
  except Exception as err:
      ADP_log_exception(process, logger_name, level_action, log_level, "", sys._getframe().f_code.co_name,  sys.exc_info())
      raise Exception(err)
  return file_db
  
#######################################################################################
  
def QA_CANONIZE_FARMATIC_CTL_PROCESS_FILE_DATA(df):
  """Process file data and converts it into canonical format

      Parameters:
      Dataframe df      -- Dataframe with the proper structure
    Return:
      Dataframe -- new dataframe with the CTL.PROCESS_FILE DB TABLE structure

    Example:

  """
  #Who                 When           What
  #Victor Salesa       20/11/2018     Initial version
  #Ana Perez           16/04/2019     Included log managment and exception managment
  
  try:
    return __QA_CANONIZE_FARMATIC_CTL_PROCESS_FILE_DATA__(df)
  except Exception as err:
      ADP_log_exception(process, logger_name, level_action, log_level, "", sys._getframe().f_code.co_name,  sys.exc_info())
      raise Exception(err)

#######################################################################################
  
def QA_SERIALIZE_FARMATIC_FILE_NAME_VALIDATION_DATA(df):
  """Serialize QA data for file name validation
  
      Parameters:
      Dataframe df  -- Dataframe with the proper structure containing file name quality information
    Return:
       Return:
      Dataframe -- new dataframe with the CTL.PROCESS_FILE DB TABLE structure
    Example:
    

  """
  #Who                 When           What
  #Victor Salesa       21/11/2018     Initial version
  #Victor Salesa       11/01/2019     Changed write db output to CTL.PROCESS_FILE_VAL
  #Victor Salesa       11/01/2019     Changed write file output to ctl_process_file_val 
  #Victor Salesa       27/02/2019     Changed write to parquet
  #Victor Salesa       28/02/2019     Added select fields in order and force schema to be sure that schema in file and db is the right one
  #Victor Salesa       18/03/2019     Added STATUS logic to be -1 when the file is not correct.
  #Ana Perez           16/04/2019     Included log managment and exception managment
  
  try:
    df.cache()

    #Transform file name validation process results into canonical
    file_db = df.transform(QA_CANONIZE_FARMATIC_CTL_PROCESS_FILE_DATA).cache()
    file_db = GetDataFrameAsSchema(file_db,__CTL_PROCESS_FILE_VAL_SCHEMA__)
    
    #Write to Folder
    ADP_log_debug(process, logger_name, level_action, log_level, "Before saveAsCanonical ctl_process_file_val: " + str(file_db.count()) + " rows ", sys._getframe().f_code.co_name)
    saveAsCanonical(file_db,__QUALITY_PFV_H_FILE_PATH__,table_name=__QUALITY_PFV_H_TABLE_NAME__,mode='append')
    ADP_log_debug(process, logger_name, level_action, log_level, "After saveAsCanonical ctl_process_file_val", sys._getframe().f_code.co_name)
    
    #Write to Database
    ADP_log_debug(process, logger_name, level_action, log_level, "Before saveToDB CTL.PROCESS_FILE_VAL: " + str(file_db.count()) + " rows ", sys._getframe().f_code.co_name)
    saveToDB(file_db,__QUALITY_PFV_DB_TABLE_NAME__,mode="append",debug=False)
    ADP_log_debug(process, logger_name, level_action, log_level, "After saveToDB CTL.PROCESS_FILE_VAL", sys._getframe().f_code.co_name)
    
    return file_db
  except Exception as err:
      ADP_log_exception(process, logger_name, level_action, log_level, "", sys._getframe().f_code.co_name,  sys.exc_info())
      raise Exception(err)
#######################################################################################