In [6]:
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
from pyspark import SparkContext, SparkConf
import pandas as pd
from pyspark.sql import Row
import sys
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import datetime as dt
from dateutil.relativedelta import relativedelta as rl

import requests
import json
import os
import time
import sys
import traceback
from threading import Thread, Event


In [7]:
failed = False
parallel_pipes = []

user_id = '0b18fb23-a68c-4072-ae7d-7a0fee6a184e'
project_id = 'd79a923b-8d76-481d-9a18-fddf9dbd0a57'


 
#Current folder path
#json_data_path = '/bigbrain/bb-admin/controller/data/kerbros_cron/li_prod/' 
json_data_path = str(os.getcwd())+'/'

header = {
    "X-Consumer-Custom-ID": user_id,
    "Content-Type": "application/json"
}

ip = "10.9.106.143"
port = "8081"

base_url = f"http://{ip}:{port}/"


In [15]:
def trigger_pipeline(trigger_api_url, file_name):
    output = {
        'result': '',
        'error': '',
        'traceback': ''
    }
    try:
        data_path = json_data_path + file_name
        url = base_url + trigger_api_url
        res = requests.post(url=url, data=json.dumps(json.load(open(data_path))), headers=header)
        print(str(res.status_code))
        print(str(res))
        output['result']=res.json()
        return output
    except Exception as e:
        output['error'] = "Error while triggering pipeline: " + str(e)
        output['traceback'] = str(traceback.format_exc())
        print(f"Error: {str(e)} \n trace: {traceback.format_exc()}")
        return output
        


def get_status(run_id, retry=10):
    output = {
        'result': '',
        'error': '',
        'traceback': ''
    }
    try:
        url = f"{base_url}/bigbrain/engine/run_view/{project_id}/pipeline/run-id/run-details?runId={run_id}"
        res = requests.get(url, headers=header)
        output['result']=res.json()
        return output
    except Exception as e:
        output['error'] = "Error while getting status of pipeline: " + str(e)
        output['traceback'] = str(traceback.format_exc())
        print(f"Error: {str(e)} \n trace: {traceback.format_exc()}")


def check_status(run_id, name):
    output = {
        'status': '',
        'runNumber': '',
        'block_name': '',
        'error': '',
        'traceback': ''
    }
    time.sleep(10)
    retry = 5
    while True:
        try:
            time.sleep(300) #Check status for every 5min
            res = get_status(run_id)
            if res['result']['status'] == 'SUCCESS':
                if res['result']['result']['status'] == 'COMPLETED':
                    stat=f" Status for pipeline: {name} is {res['result']['result']['status']}"
                    print(stat)
                    output['status'] = res['result']['result']['status']
                    output['runNumber'] = 'Run '+ str(res['result']['result']['runNumber'])
                    return output
                elif res['result']['result']['status'] in ['IN_PROGRESS', 'YET_TO_START']:
                    continue
                elif res['result']['result']['status'] in ['STOPPED']:
                    stat=f" Status for pipeline: {name} is {res['result']['status']}"
                    print(stat)
                    output['status'] = res['result']['result']['status']
                    output['runNumber'] = 'Run '+ str(res['result']['result']['runNumber'])
                    return output
                else:
                    failed = True
                    stat=f" Status for pipeline: {name} is {res['result']['result']['status']}"
                    print(stat)
                    output['status'] = res['result']['result']['status']
                    output['runNumber'] = 'Run '+ str(res['result']['result']['runNumber'])
                    if 'blockRunDetails' in res['result']['result']:
                        for obj in res['result']['result']['blockRunDetails']:
                            x = res['result']['result']['blockRunDetails'][obj]
                            if x['status'] == 'FAILED':
                                output['block_name'] = x['stageName']
                    return output
            else:
                retry = retry - 1
                if retry<1:
                    output['error'] == res['result']['result']
                    return output
        except Exception as e:
            print(f"Error: {str(e)} \n trace: {traceback.format_exc()}")
            retry = retry - 1
            if retry<1:
                output['error'] = "Error while getting status of pipeline: " + str(e)
                output['traceback'] = str(traceback.format_exc())
                return output


# Run parallel Pipelines

def run_pipeline(req):
    output = {
        'status': '',
        'error': '',
        'traceback': ''
    }
    try:
        if failed is True:
            print(f"Skipping Run: {req['name']}")
            return
        print(f"Running: {req['name']}")
        tr_res = trigger_pipeline(req['trigger_url'], req['data'])
        if 'result' in tr_res:
            if 'result' in tr_res['result']:
                if 'id' in tr_res['result']['result']:
                    run_id = tr_res['result']['result']['id']
                    if run_id:
                        req['run_id'] = run_id
                        # monitor pipeline till it gets completed
                        stat_res=check_status(run_id, req['name'])
                        return stat_res
                else:
                    print(tr_res)
                    output['status'] = 'ERROR'
                    output['error'] = 'Error while triggering pipeline: ' + tr_res['result']['result']
                    output['traceback'] = tr_res['traceback']
                    return output
            else:
                output['status'] = 'ERROR'
                output['error'] = tr_res['error']
                output['traceback'] = tr_res['traceback']
                return output
        else:
            output['status'] = 'ERROR'
            output['error'] = "Something wrong with run check logs!!"
            return output

    except Exception as e:
        print(e)
        output['status'] = 'ERROR'
        output['error'] = 'Error while triggering pipeline: ' + tr_res['result']['result']
        output['traceback'] = tr_res['traceback']
        return output
    finally:
        if req in parallel_pipes:
            parallel_pipes.remove(req)


pipelines_details = [
    {
        "name": "Production Pipeline-Final",
        "trigger_url": "/bigbrain/engine/run_view/pipeline/batch/api-trigger?apiLinkUUID=991e49ba-3f8a-4626-9e9b-92d6062e3f30",
        "execution": "parallel",
        "data": "fx_churn.json",
        "skip": True
    },{
        "name": "Production Pipeline-Final",
        "trigger_url": "/bigbrain/engine/run_view/pipeline/batch/api-trigger?apiLinkUUID=38a84608-2125-428d-b853-a0016c1c1be8",
        "execution": "parallel",
        "data": "fx_churn.json",
        "skip": False
    }
]


stat_res=run_pipeline(pipelines_details[1])
stat = stat_res['status']
print('Done', stat_res)


Running: Production Pipeline-Final
200
<Response [200]>
 Status for pipeline: Production Pipeline-Final is FAILED
Done {'status': 'FAILED', 'runNumber': 'Run 511', 'block_name': 'NAS Reader', 'error': '', 'traceback': ''}


In [13]:
stat

{'status': 'FAILED', 'block_name': 'NAS Reader', 'error': '', 'traceback': ''}

In [6]:
## creating spark session ##

spark = SparkSession \
    .builder \
    .appName("Corporate Forex Churn Prediction Mail Automation Script") \
    .enableHiveSupport() \
    .getOrCreate()



In [7]:
## Getting todays date ##

today = dt.date.today()
month=dt.datetime.strftime(today,'%Y')+dt.datetime.strftime(today,'%m')
month2=dt.datetime.strftime(today,'%b')
month,month2

('202008', 'Aug')

In [8]:
## Final HIVE tables in BDL PROD where output of RZT pipeline is stored ##

table_name_high='test_high'
table_name_low='test_low'

In [9]:
## Getting todays count from final tables ##

high_count=spark.sql("select count(*) from usecase_common.{} where month_period={}".format(table_name_high,month)).collect()[0][0]
low_count=spark.sql("select count(*) from usecase_common.{} where month_period={}".format(table_name_low,month)).collect()[0][0]
high_count,low_count

(15339, 38412)

In [10]:
# me == my email address
# you == recipient's email address
#me = "akash1.agarwal@axisbank.com"
me = "177724@axisbank.com"
you = ["177724@axisbank.com"]
cc = ["177724@axisbank.com"]

In [11]:
# Create message container - the correct MIME type is multipart/alternative.

msg = MIMEMultipart('alternative')
msg['Subject'] = "RZT Pipeline Status Email : Corporate Forex churn Predictions " + "<" + today.strftime('%d%B%Y') + ">"
msg['From'] = me
msg['To'] = ", ".join(you)
msg['Cc'] = ", ".join(cc)

# Create the body of the message (a plain-text and an HTML version).
html = """\
<html>
  <head></head>
  <body>
 <h1>
        <font color="black">RZT Pipeline for Cororate Forex Churn Status</font></h1><br>
        <p style = "font-family: calibri; font-size: 14pt; font-style: calibri; font-weight: normal; text-align: left">Status of RZT Pipeline for {} month is {}.<br><br>Data Count of each of the segment is below:<br><br>HIGH : {}<br>LOW: {}<br></p>
 </body>
</html>
""".format(month2,stat,high_count,low_count)

# Record the MIME types of one part - text/html.
part2 = MIMEText(html, 'html')

# Attach parts into message container.
# According to RFC 2046, the last part of a multipart message, in this case
# the HTML message, is best and preferred.

msg.attach(part2)


In [12]:
# Send the message via local SMTP server.

s = smtplib.SMTP('10.9.9.28', 2255)
s.ehlo()

s.sendmail(me, you + cc, msg.as_string())
s.quit()

(221, b'2.0.0 Bye')

In [13]:
## stoping the spark session ##

spark.stop()