In [1]:
import re
import csv
import json
from time import sleep
from bs4 import BeautifulSoup
import requests

In [2]:
def extract_salary_info(job_title, job_location):
    "A function to extract and return salary information"
    
    template = "https://www.salary.com/research/salary/alternate/{}-salary/{}"
    url = template.format(job_title, job_location)

    try:
        response = requests.get(url)
        if response.status_code != 200:
            return None
    except requests.exceptions.ConnectionError:
        return None

    soup = BeautifulSoup(response.text, "html.parser")
    
    pattern = re.compile(r"Occupation")
    script = soup.find("script", {"type": "application/ld+json"}, text=pattern)
    json_raw = script.contents[0]
    json_data = json.loads(json_raw)
    
    job_title = json_data["name"]
    location = json_data["occupationLocation"][0]["name"]
    description = json_data["description"]
    ntile_10 = json_data["estimatedSalary"][0]["percentile10"]
    ntile_25 = json_data["estimatedSalary"][0]["percentile25"]
    ntile_50 = json_data["estimatedSalary"][0]["median"]
    ntile_75 = json_data["estimatedSalary"][0]["percentile75"]
    ntile_90 = json_data["estimatedSalary"][0]["percentile90"]

    salary_data = (job_title, location, description, ntile_10, ntile_25, ntile_50, ntile_75, ntile_90)

    return salary_data

In [3]:
with open("largest_cities.txt", newline="") as f:
    reader = csv.reader(f)
    cities = [city for row in reader for city in row]

In [4]:
salary_data = []
position = "senior-accountant"

for city in cities:
    result = extract_salary_info(position, city)
    if result:
        salary_data.append(result)
        sleep(0.5)

In [5]:
for row in salary_data[:5]:
    print(row)

('Senior Accountant', 'New York, NY', 'The Senior Accountant ensures the accuracy of entries to ledger accounts and reconciles subsidiary ledger accounts to the general ledger. Maintains financial records and ensures that financial transactions are properly recorded. Being a Senior Accountant analyzes current costs, revenues, financial commitments, and obligations incurred to predict future revenues and expenses. Prepares complex balance sheets, profit and loss statements and other financial reports. In addition, Senior Accountant may supervise and guide lower-level accountants. Requires a bachelor&#39;s degree. May require CPA certification. Typically reports to a supervisor or manager. Being a Senior Accountant contributes to moderately complex aspects of a project. Work is generally independent and collaborative in nature. Working as a Senior Accountant typically requires 4 to 7 years of related experience.', '82759', '91590', '101290', '112090', '121923')
('Senior Accountant', 'Los

In [6]:
with open("salary-results.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["Title", "Location", "Description", "ntile10", "ntile25", "ntile50", "ntile75", "ntile90"])
    writer.writerows(salary_data)