In [10]:
import requests
import json
import csv
from bs4 import BeautifulSoup

# Function to fetch data from a given URL with parameters
def fetch_data(url, params=None, cookies=None, headers=None):
    response = requests.get(url, params=params, cookies=cookies, headers=headers)
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to fetch data. Status Code: {response.status_code}")
        return None

# Function to parse the response and extract relevant data
def parse_data(content):
    soup = BeautifulSoup(content, 'html.parser')
    site_json = json.loads(soup.text)
    job_postings = []
    for result in site_json['results']:
        employer_name = result['job']['employer_name']
        posting_url = f"https://app.joinhandshake.com/jobs/{result['job']['id']}"  # Construct posting URL
        location_state = result['job']['location_states'][0] if result['job']['location_states'] else None
        location_city = result['job']['location_cities'][0] if result['job']['location_cities'] else None
        employment_type = result['job']['employment_type_name']
        job_title = result['job']['title']
        created_at = result['created_at']
        expiration_date = result['expiration_date']
        apply_start = result['apply_start']
        job_postings.append({
            'employer_name': employer_name,
            'posting_url': posting_url,
            'location_state': location_state,
            'location_city': location_city,
            'employment_type': employment_type,
            'job_title': job_title,
            'created_at': created_at,
            'expiration_date': expiration_date,
            'apply_start': apply_start
        })
    
    return job_postings

def write_to_csv(data, filename="job_listings.csv", append=False):
    fieldnames = ["employer_name", "posting_url", "location_state", "location_city", "employment_type", "job_title", "created_at", "expiration_date", "apply_start"]
    
    # Open file in append mode if specified
    mode = 'a' if append else 'w'
    write_header = not append  # Write header only once
    
    with open(filename, mode, newline="") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        if write_header:
            writer.writeheader()
        for job in data:
            writer.writerow(job)
    
    print(f"Data written to {filename}")

def scrape_employer_data(url, cookies, headers, params=None, output_file="job_listings.csv", num_pages=10):
    for page in range(1, num_pages + 1):  # Loop through each page
        print(f"Fetching page {page}...")
        params['page'] = page  # Set the page number dynamically in params
        content = fetch_data(url, params, cookies, headers)
        if content:
            jobs = parse_data(content)
            # Append jobs to the CSV
            write_to_csv(jobs, output_file, append=(page > 1))  # Append to file after the first page

In [11]:
URL = "https://app.joinhandshake.com/stu/postings"

cookies = {'production_submitted_email_address': 'eyJfcmFpbHMiOnsibWVzc2FnZSI6IkltdDJhV0oxYUhReFFIVnRZbU11WldSMUlnPT0iLCJleHAiOiIyMDQ0LTA5LTIzVDE0OjQ4OjUzLjc0OVoiLCJwdXIiOm51bGx9fQ%3D%3D--19d4143e73cd7b7739aa394589e261d040b83ad8',
    'production_current_user': '46795941',
    'hss-global': 'eyJhbGciOiJkaXIiLCJjdHkiOiJKV1QiLCJlbmMiOiJBMjU2Q0JDLUhTNTEyIiwidHlwIjoiSldUIn0..cgt_3h52mhFSKvwQup6lAw.6uPrsUlSAKvZu2--ilmIDpijLU1HptlAu_cAE_FEI7l8h6mSuu2GHe4ytcnGyjOzTJ94mkHzI_TnC28WSiasVJ5kW1toSstvgZtBl2EQaEkn0t6_vKBZJug7-tozybxZncziJ5NBYYtGgkiL9NA3D6VMoHaOBXCehWV0FPyXDGPMrdXlzPYKS-xkdvcaZzwoaVY-zMMylHkXrT3h8R3i-I-fm-l4FJEhpDA7niYI4GelDM-fKQ5dVJsKcjQfmrs-rOyF5sfyZopdaro0aE1Utf_kQpOniZmJm6pJXhZO8nmdSJFjHkOfJ9Nr_aYte44KEtccHhyy_cvd8ZnjrxqsggOBL2dzqAKieYPjjNED3NX0zzrwjoA3A5V798266Ow6.zPUqitgygs0i1Q4RUeIfiHKa-NcI0VD6VJoF2sCkb2w',
    '_trajectory_session': 'SHZSWEdmdG9MbDM2eWFzMHBrQU1jM2xjQTdQa1YwcUZZdVZFcFJrTE1kUXIyT3ZtQyt2ZHVzV1JJRTFTRmcyUExSZldLc1lQUEtIZGdFSndsZCt5MkZhOGtqT2RIODZnTlpMNlBzbjQxQ3d0d1BvZVNaa0FuNkp5eEJEd0d1K0UweWt3MHZPMloveEhGaytVQzQ1SGlUalkwSndsYWJKZ2ZmSk5RNXlGVHRMNk9uNDhjOFFkVWZEdU8wbUR5STRLMWVUc1dCclIycGZUVFJWNUNlNTNwOCs1bUdldkoxM2FkSDg4Yk9ENTZTaTY0T1BZdW9EaDVkeGIzVExQZmlubGduTlZwRDQ2dS8xaFRKSm9nOXJmY2dzK3p5M1c5eVFQTkhSV01sL1ZoYUV5OFJiSDRVdlJEOHAwSEhOV3YvWWYxNEROR003UVZsS3VnWWh6enBJMVRRPT0tLVk0LytHalZLa0Z5eUQrUFYyQ25xZVE9PQ%3D%3D--6b2bdc2d85bdc25d48c7aa2af50a749832c3a3d6',
    'production_js_on': 'true',
    'production_46795941_incident-warning-banner-show': '%5B%5D',}

headers = {'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'Accept-Language': 'en-US,en;q=0.5',
    'Referer': URL,
    'X-CSRF-Token': '56GPQhRCjWsPoTz5u5Uk8tWt52r9SuDC3+NlFjhR+KOpgw87IK/LfgH9Ihzrzf5xSw6jS9ksc/iQKXtTO55TNA==',
    'X-Requested-With': 'XMLHttpRequest',
    'Connection': 'keep-alive',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    }
params = {
    'page': '1',
    'per_page': '25',
    'sort_direction': 'desc',
    'sort_column': 'default',
    'job.job_applicant_preference.willing_to_sponsor_candidate': 'true',
    'job.job_applicant_preference.accepts_opt_cpt_candidates': 'true',
    'job.job_applicant_preference.work_auth_not_required': 'true',
    'job.industries[]': '1034'
}


In [12]:
URL = "https://app.joinhandshake.com/stu/postings"

scrape_employer_data(URL, cookies, headers, params, "job_listings.csv", num_pages=15)

Fetching page 1...
Data written to job_listings.csv
Fetching page 2...
Data written to job_listings.csv
Fetching page 3...
Data written to job_listings.csv
Fetching page 4...
Data written to job_listings.csv
Fetching page 5...
Data written to job_listings.csv
Fetching page 6...
Data written to job_listings.csv
Fetching page 7...
Data written to job_listings.csv
Fetching page 8...
Data written to job_listings.csv
Fetching page 9...
Data written to job_listings.csv
Fetching page 10...
Data written to job_listings.csv
Fetching page 11...
Data written to job_listings.csv
Fetching page 12...
Data written to job_listings.csv
Fetching page 13...
Data written to job_listings.csv
Fetching page 14...
Data written to job_listings.csv
Fetching page 15...
Data written to job_listings.csv
