In [0]:
%python
## Import libraries
import pyspark as ps
from pyspark.sql.functions import *
from pyspark.sql.types import *
from datetime import *
import pandas as pd
import json



In [0]:
# Retrieve the parameter values
job_id = dbutils.widgets.get("job_id")
usecase_id = dbutils.widgets.get("usecase_id")

# Print the parameter values
print(f"job_id: {job_id}")
print(f"usecase_id: {usecase_id}")

In [0]:
class ValidateNPI:
  def __init__(self):
    self.query = "Query About to fill"
    self.sql_df = pd.DataFrame()
    self.data_json = {"Data":[]}    
    self.usecase_id = None
  
  def logger(self, message):
    print(message)
    
  def set_query_string(self, usecase_parm):
    query = f"""delete from com_us_alyt_ngebox.NGEBox_Suggestion_Requests_Processed_TEMP_NPI_VALIDATE where usecase_id = '{usecase_parm}';

WITH t(usecase_id, hist_retention) AS (
  SELECT USECASE_ID, hist_retention
  FROM com_us_alyt_ngebox.metadata_usecase
  WHERE USECASE_ID = '{usecase_parm}'
),
s(SUGGEST_EXTERNAL_ID_VOD__C, NGEBox_Record_ID) AS (
  SELECT p.SUGGEST_EXTERNAL_ID_VOD__C, p.NGEBox_Record_ID
  FROM (select * from com_us_alyt_ngebox.NGEBox_Suggestion_Requests_Processed where usecase_id = '{usecase_parm}') p
  LEFT JOIN com_us_lake.rep_suggestion_vod__c s ON p.SUGGEST_EXTERNAL_ID_VOD__C = s.SUGGESTION_EXTERNAL_ID_VOD__C
  WHERE (
      s.SUGGESTION_EXTERNAL_ID_VOD__C IS NULL
      OR (s.SUGGESTION_EXTERNAL_ID_VOD__C IS NOT NULL AND s.DISMISSED_VOD__C = 0)
    )
)
insert into  com_us_alyt_ngebox.NGEBox_Suggestion_Requests_Processed_TEMP_NPI_VALIDATE 
select * from com_us_alyt_ngebox.NGEBox_Suggestion_Requests_Processed 
WHERE case when '{usecase_parm}' IN ('513','514') then (usecase_id = '{usecase_parm}' AND CAST(created_date AS DATE) = current_date) 
  else (usecase_id = '{usecase_parm}' AND CAST(created_date AS DATE) >= date_add(current_date, -(SELECT hist_retention FROM t)) AND SUGGEST_EXTERNAL_ID_VOD__C IN (SELECT SUGGEST_EXTERNAL_ID_VOD__C FROM s)) end; 

with x(NGEBOX_RECORD_ID) AS (select distinct NGEBOX_RECORD_ID from
( select * from com_us_alyt_ngebox.NGEBox_Suggestion_Requests_Processed_TEMP_NPI_VALIDATE where USECASE_ID = '{usecase_parm}') p
left join com_us_hub.ref_cust_xref x_iengage on p.HCP_OMNI_ID = x_iengage.omni_id and x_iengage.src_system = 'iEngage'
left join com_us_hub.ref_cust_xref x_KOMO on p.HCP_OMNI_ID = x_KOMO.omni_id and x_KOMO.src_system = 'KOMO'
inner join com_us_hub.ref_cust_identifier as ci on p.hcp_omni_id = ci.omni_id and p.hcp_npi = ci.identifier and ci.identifier_typ = 'NPI' and ci.scd_curr_ind = 'Y'
where (case when '{usecase_parm}' in (514) then ((x_iengage.omni_id is not NULL OR  x_KOMO.omni_id is not null ) AND
 case when x_iengage.omni_id is not NULL and  x_KOMO.omni_id is not null then (x_iengage.omni_id = ci.omni_id and  x_KOMO.omni_id = ci.omni_id  and x_iengage.src_id = p.account_vod__c and x_KOMO.src_id = p.HCP_NPI)
 when x_iengage.omni_id is not NULL then (x_iengage.omni_id = ci.omni_id and x_iengage.src_id = p.account_vod__c) 
 when x_KOMO.omni_id is not null then (x_KOMO.omni_id = ci.omni_id and x_KOMO.src_id = p.HCP_NPI) end)
 else (x_iengage.src_system ='iEngage'  and x_iengage.omni_id = ci.omni_id  and  x_iengage.src_id = p.account_vod__c )end)),
y(NGEBOX_RECORD_ID) AS (
  SELECT p.NGEBOX_RECORD_ID FROM com_us_alyt_ngebox.NGEBox_Suggestion_Requests_Processed_TEMP_NPI_VALIDATE p WHERE p.NGEBOX_RECORD_ID NOT IN (
    SELECT DISTINCT x.NGEBOX_RECORD_ID FROM x
  )
)
MERGE INTO com_us_alyt_ngebox.NGEBox_Suggestion_Requests_Processed   AS t
USING y AS y
ON (t.NGEBOX_RECORD_ID = y.NGEBOX_RECORD_ID and t.usecase_id = '{usecase_parm}' AND length(t.hcp_npi) > 0)
WHEN MATCHED THEN
  UPDATE SET  t.Not_Sent_To_Veeva_Reason = (case  when ISNULL(t.Not_Sent_To_Veeva_Reason)= true then 'Invalid NPI Value' else concat(t.Not_Sent_To_Veeva_Reason, '|', 'Invalid NPI Value' ) end); """
    self.query = query    
    
  def run_query_and_set_sqldf(self, usecase_id):
      self.usecase_id = usecase_id
      query_list = self.query.split(";")
      self.sql_df = []  
      for curr_query in query_list:
          if curr_query.strip() == "":
              continue
          curr_query_with_param = curr_query.replace("Usecase_parm", usecase_id)
          sql_df = sqlContext.sql(curr_query_with_param).collect()
          self.sql_df.extend(sql_df)
    
  def format_sqldf_to_json(self):
    sql_output_rowlist = []
    for curr_row in self.sql_df: 
      sql_output_rowlist.append (curr_row.asDict(True))
    sql_df_new = pd.DataFrame(sql_output_rowlist)
    sql_jsonstr = sql_df_new.to_json(orient="records") # type(result) -> str
    sql_jsonparse = json.loads(sql_jsonstr) # type(parsed_json) -> list
    nb_json = {"data":sql_jsonparse} # type(metadata_json) -> dict
    self.nb_json = nb_json
    
  def construct_microservice_response(self):
    resp = {
      "nge_response": {
        "status": 200,
        "body": self.nb_json
      }
    }
    self.response = resp

In [0]:
usecase_id = dbutils.widgets.get("usecase_id")
nb_obj = ValidateNPI()
nb_obj.set_query_string(usecase_parm=usecase_id)
nb_obj.run_query_and_set_sqldf(usecase_id=usecase_id)
nb_obj.format_sqldf_to_json()
nb_obj.construct_microservice_response()
dbutils.notebook.exit(nb_obj.response)