In [349]:
from bs4 import BeautifulSoup
import requests, yaml, pytz
import datetime as dt
import pandas as pd

In [350]:
efc_url = "https://www.efinancialcareers.co.uk/jobs/in-United-Kingdom"

url = efc_url

In [351]:
def parser():
    headers = headers={'User-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0'}
    content = requests.get(url, headers).content
    soup = BeautifulSoup(content, "html.parser")

    return soup

In [352]:
def job_card():
    soup = parser()
    
    efc_jobcard = soup.find('div', {'class': 'd-flex flex-column jobs-container ng-star-inserted'}).find_all('efc-job-card')

    card_details = [card.find('efc-card-details') for card in efc_jobcard]
    card_footer = [card.find('efc-card-footer') for card in efc_jobcard]
        
    return card_details, card_footer

In [353]:
def first_page():
        jobcard = job_card()
        card_det, card_ftr = jobcard[0], jobcard[1]

        job_title, company, job_link, location, position_type, work_arrangement, salary = [], [], [], [], [], [], []
        job_dct = {}

        for item in card_det:
            job_title.append(item.find('a').get('title'))
            company.append(item.find_all('div')[3].text.lstrip().rstrip())
            job_link.append(item.find('a').get('href'))
            location.append(item.find('span' , {'class': 'dot-divider'}).text)
            position_type.append(item.find('span' , {'class': 'dot-divider ng-star-inserted'}).text)
        
        for item in card_ftr:
            ln1 = len(item.find_all('div')[1])
            ln2 = len(item.find_all('div')[1].find_all('span'))
        
            if ln1 == 6 and ln2 == 2:
                work_arrangement.append(item.find_all('div')[1].find_all('span')[0].text)
                salary.append(item.find_all('div')[1].find_all('span')[1].text)
            elif (ln1 == 5 and ln2 == 1) or (ln1 == 7 and ln2 == 2):
                work_arrangement.append(None)
                salary.append(item.find_all('div')[1].find_all('span')[0].text)
        
        job_dct = {"Position": job_title, "Company": company, "Link": job_link, "Location": location, "Position Type": position_type,
                   "Work Arrangement": work_arrangement, "Salary": salary}
        final_df = pd.DataFrame(job_dct)

        return final_df

In [354]:
def time_diff(nw, pdt):
    time1 = dt.datetime(nw.year, nw.month, nw.day, nw.hour, nw.minute, nw.second)
    time2 = dt.datetime(pdt.year, pdt.month, pdt.day, pdt.hour, pdt.minute, pdt.second)

    diff = time1 - time2
    mins, sec = divmod(diff.total_seconds(), 60)

    return mins

In [355]:
def api_url(pg):
    with open("vars.yaml", "r") as f:
        vars = yaml.load(f, Loader = yaml.FullLoader)

    url = ""
    iter = 0
    for key, val in vars.items():
        if iter == 0: pass
        elif iter == 4: url += val + str(pg) + "&"
        else: url += val

        iter += 1

    return url

In [356]:
from dateutil import parser

def next_page(last_hr):
    job_title, company, job_link, location, position_type, work_arrangement, salary = [], [], [], [], [], [], []
    job_dct = {}
    
    pg, diff_mins = 1, 0
    while diff_mins < last_hr:
        pg_url = api_url(pg)
        data = requests.get(pg_url).json()['data']

        for i in range(0, len(data)):
            nw = dt.datetime.now().astimezone(pytz.utc)
            pdt = parser.parse(data[i]['postedDate'])
            diff_mins =  round(time_diff(nw, pdt) / 60, 2)

        
            job_title.append(data[i]['title'])
            company.append(data[i]['companyName'])
            job_link.append("https://www.efinancialcareers.co.uk" + data[i]['detailsPageUrl'])
            location.append(data[i]['jobLocation']['displayName'])

            try: position_type.append(data[i]['positionType'])
            except: position_type.append(None)
            
            try: work_arrangement.append(data[i]['workArrangementType'])
            except: work_arrangement.append(None)
            
            try: salary.append(data[i]['salary'])
            except: salary.append(None)

            if len(data) == 15:
                if diff_mins > last_hr: break
                else: pg += 1

    job_dct = {"Position": job_title, "Company": company, "Link": job_link, "Location": location, "Position Type": position_type,
                   "Work Arrangement": work_arrangement, "Salary": salary}
    final_df = pd.DataFrame(job_dct)

    return final_df

In [358]:
last_hr = 7.5

test = next_page(last_hr)

print(test)

                                             Position  \
0   Application / Production Support Engineer - FI...   
1   Cyber Security Engineer - Banking - 9 Month Co...   
2                        Senior Actuary (market risk)   
3                 Senior Actuarial Trainee (BPA team)   
4                                Senior Legal Counsel   
5   CONTRACT C#/ Angular Software Engineer - Up to...   
6                 Conduct Risk Specialist, COO Office   
7   Interim UK Regulatory Reporting Production Man...   
8              Senior Java Developer Data Engineering   
9                    Planning Analytics TM1 Developer   
10  Associate, Technology Service Desk/On-Site Sup...   
11  Vice President - Corporate Finance (Technology...   
12                    Senior Data Modeller - XML FpML   
13                               Lead Systems Analyst   
14      Infrastructure Security & Monitoring Engineer   
15                               Property Tax Manager   

                          Comp