In [0]:
#define widgets - NEED TO DEFINE IT ONCE
# dbutils.widgets.text("environment", "","")

# To remove unnecessary dbutils
# dbutils.widgets.removeAll()

#dynamic variables (pass it from ADF)
environment = dbutils.widgets.get("environment")

In [0]:
from datetime import datetime, timedelta
import time, copy, math, json
import requests
import concurrent.futures

#pbi_base_url = "https://ideobimodeldev.azurewebsites.net/uat/v1.0/" 
pbi_base_url  = 'https://apim-bimodel-prod.azure-api.net/powerbi-model/v1/'   #hardcode need to change 
#okta_address = 'https://ideobimodeldev.azurewebsites.net/prod/v1.0/auth/getapitoken' #hardcode need to change 

sleep_seconds = 1
sleep_on_count = 10

def _pbi_assemble_headers(ocp_apim_subscription_key,subscription_key):
  if (subscription_key == ""):
    header = {
      'Content-Type': 'application/json',
      'Ocp-Apim-Subscription-Key': ocp_apim_subscription_key
    }
    return header 
  else:
    header = {
      'Content-Type': 'application/json',
      'Ocp-Apim-Subscription-Key': ocp_apim_subscription_key,
      'Authorization' : subscription_key
    }
    return header

def pbi_timestamp_by_numbers_of_hours_ago(number_of_hours):
    date = datetime.utcnow() - timedelta(hours=number_of_hours)
    return int(time.mktime(date.timetuple()))

def _get_total_pages(total_records):
  return math.ceil(total_records / 1000)

In [0]:
def _pbi_updated_params(params):
    new_params = {}
    for k, v in params.items():
        new_params[k] = v
        
    return new_params

def _set_request(method, url,ocp_apim_subscription_key,query_params={}, subscription_key = ""):
  if method == "POST":
    return requests.post(url, headers=_pbi_assemble_headers(ocp_apim_subscription_key,subscription_key), json=query_params)
  elif method == "GET":
    return requests.get(url, headers=_pbi_assemble_headers(ocp_apim_subscription_key,subscription_key), json=query_params)
  elif method == "PUT":
    return requests.put(url, headers=_pbi_assemble_headers(ocp_apim_subscription_key,subscription_key), json=query_params)
  elif method == "DELETE":
    return requests.delete(url, headers=_pbi_assemble_headers(ocp_apim_subscription_key,subscription_key), json=query_params)

def _set_request_latlong(method,url, data={}):
  if method == "GET":
    return requests.get(url, data=data)
  

In [0]:
def pbi_post_retry(data,url,ocp_apim_subscription_key,query_params,subscription_key):
  status=False
  print("retry page no: ",data["PageNo"])
  query_params["PageNo"] = data["PageNo"]
  data = _set_request("POST", url, ocp_apim_subscription_key, _pbi_updated_params(query_params),subscription_key)
  results = data.json()
  if(results["data"] is None or "ERROR"  in results["message"].upper()):
    status=True
    
  print("pbi_post_retry called")
  print("status:",status)
  return status,data

In [0]:
def pbi_post(endpoint, ocp_apim_subscription_key, query_params={},subscription_key=""):
  if endpoint == 'summary/opportunities':
    tmp_pbi_base_url = "https://ideobimodeldev.azurewebsites.net/uat/v1.0/" 
    url = '{}{}'.format(tmp_pbi_base_url, endpoint)
  else:
    url = '{}{}'.format(pbi_base_url, endpoint)
  query_params["PageNo"] = "1"
  first_page = _set_request("POST", url, ocp_apim_subscription_key, _pbi_updated_params(query_params),subscription_key)
  if first_page.status_code in [requests.codes.ok, requests.codes.created, requests.codes.no_content]:
    if (endpoint == "auth/getapitoken"):
        return first_page.text
    else:
      results = first_page.json()
      if (results["data"] is None or "ERROR"  in results["message"].upper()):
        raise Exception("API Error", results["message"])
      else:
        print("Total Row:{}....{}".format(endpoint,results['TotalRows']))
        total_pages = _get_total_pages(int(results['TotalRows']))
        future_to_url = []
        responses = []
        with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
          for page_no in range(1, total_pages + 1):
              query_params["PageNo"] = page_no
              future_to_url.append(executor.submit(_set_request, "POST", url,ocp_apim_subscription_key, _pbi_updated_params(query_params),subscription_key))

          for future in concurrent.futures.as_completed(future_to_url):
              results = future.result().json()
              if (results["data"] is None or "ERROR"  in results["message"].upper()):  #Vivek, make a re-attempt instead of raising exception
                status,data = pbi_post_retry(results,url,ocp_apim_subscription_key,query_params,subscription_key)
                if(status):
                  raise Exception("API Error", data.json()["message"]) 
                else:
                  responses.append(data)                
              else:
                responses.append(future.result())

      return responses
  else:
      return [first_page]

In [0]:
def pbi_get(endpoint, query_params={}):
  url = '{}{}'.format(pbi_base_url, endpoint)
  response = _set_request("GET", url)
  return response

def pbi_get_latlong(endpoint):
  url = '{}'.format(endpoint)
  response = _set_request_latlong("GET", url)
  return response
  

In [0]:
from pyspark.sql.functions import length, when,udf, array

@udf
def udf_generate_address(city,street,state,postal_code,country):
  address = ""
  if city:
    address += city
  if street:
    address += "," + street
  if state:
    address += "," + state
  if postal_code:
    address += "," + postal_code
  if country:
    address += "," + country
  return address

@udf
def udf_generate_count(city,street,state,postal_code,country):
  cnt = 0
  if city:
    cnt += 1
  if street:
    cnt += 1
  if state:
    cnt += 1
  if postal_code:
    cnt += 1
  if country:
    cnt += 1
  return cnt