In [1]:
#import necessary modules.
from pyspark import SparkContext, SparkConf
import requests
import json
from IPython.display import HTML, Javascript, display, clear_output
import time


In [2]:
# A Spark job function.
def wordCountExample():   
    
    #alice30.txt file url
    fileInputUrl = '/home/mert/Downloads/alice30.txt'

    distFile = sc.textFile(fileInputUrl)

    nonempty_lines = distFile.filter(lambda x: len(x) > 0)

    words = nonempty_lines.flatMap(lambda x: x.split(' '))

    wordcounts = words.map(lambda x: (x, 1)) \
                      .reduceByKey(lambda x, y: x+y) \
                      .map(lambda x: (x[1], x[0])).sortByKey(False)

    wordcounts.take(1000)


In [3]:
# This function returns the desired element from the Rest Api. 
def getAppId(url,option):    
    response = requests.get(url)
    data = response.json()
    for element in data:
        parsed =(element[option])
        return parsed

In [4]:
# We are ready to use Spark Rest Api.

url= "http://localhost:4040/api/v1/applications/"

# In this case this function returns application id.
appId = getAppId(url,"id")


#we get our endpoints.
allJobsUrl = "http://localhost:4040/api/v1/applications/" + appId + "/jobs/"

allSucceededJobsUrl = allJobsUrl + "?status=SUCCEEDED"

allRunningJobsUrl = allJobsUrl + "?status=running"


print ("All jobs link: "+ allJobsUrl)
print ("All succeded jobs link: "+ allSucceededJobsUrl)
print ("All running jobs link: "+ allRunningJobsUrl)


All jobs link: http://localhost:4040/api/v1/applications/local-1490805322357/jobs/
All succeded jobs link: http://localhost:4040/api/v1/applications/local-1490805322357/jobs/?status=SUCCEEDED
All running jobs link: http://localhost:4040/api/v1/applications/local-1490805322357/jobs/?status=running


In [5]:
# This is Java Script and Html Code for the progress bar.
# This bar will dynamically change for given inputs.
def ProgressBar(jobId,percentage):    
    clear_output(wait=True)
    display( 
        HTML(
    '''

    <html>
        
    <script>
    function move() {
      var elem = document.getElementById("myBar");   
      var width = 1;
      var id = setInterval(frame, 100);
      function frame() {
        if (width >= 100* '''+ percentage +''' ) {
          clearInterval(id);
        } else {
          width++; 
          elem.style.width = width + '%'; 
          elem.innerHTML = width * 1  + '%';
        }
      }
    }
    </script>
    
    <style>
    #myProgress {
      width: 100%;
      background-color: #ddd;
    }

    #myBar {
      width: 0%;
      height: 30px;
      background-color: #4CAF50;
      text-align: center;
      line-height: 30px;
      color: white;
    }
    </style>
    <body >


    <h1> Job Id: ''' +jobId +'''</h1>

    <div id="myProgress">
      <div id="myBar">0%</div>
    </div>

    <script>move();</script>

    </body >
    </html>


        ''') 
          )
    
    
    return

In [6]:
# This code will create a dynamic progress bar for the current jobs.

allJobs= []
currentJobs = []

for i in range(1):
    # create our spark jobs
    # sparkJob()
    wordCountExample() 
    
    
    
    # We get the json data for successful jobs. We could have asked the json data for running jobs as well but 
    # we will need a bigger running time since when the execution time of the job is small we could miss the information 
    # because we are using rest api to get it.
    # we could use kafka instead but I believe this is not asked for.
    response = requests.get(allSucceededJobsUrl) 
    data = response.json()
    currentJobs = []
    for j in data:
        # we are aiming to monitoring just executed jobs so we are checking if they are "just" executed.
        if not j['jobId'] in  allJobs:
            allJobs.append(j['jobId'])
            currentJobs.append(j['jobId'])
            jobId = str(j['jobId'])
            numTasks = j['numTasks']
            numCompletedTasks = j['numCompletedTasks']
            # we don't like to see division by zero error.
            if not(numTasks is 0):
                percentage = str(float(numCompletedTasks / numTasks))
                # call the html and javascript functions.
                ProgressBar(jobId,percentage)
                # We should wait until the bar finishes.
                # Also we want to see more our beautiful progress bar :) 
                time.sleep(10)