In [1]:
"""
The original decoration ID "WebLightJobPosting-23" provides only basic job information. 
To get more comprehensive job details, modify the decorationId in the get_job function located in linkedin_api/linkedin.py as follows:

params = {
    "decorationId": "com.linkedin.voyager.deco.jobs.web.shared.WebFullJobPosting-65",
}

"""

'\nThe original decoration ID "WebLightJobPosting-23" provides only basic job information. \nTo get more comprehensive job details, modify the decorationId in the get_job function located in linkedin_api/linkedin.py as follows:\n\nparams = {\n    "decorationId": "com.linkedin.voyager.deco.jobs.web.shared.WebFullJobPosting-65",\n}\n\n'

In [None]:
from linkedin_api import Linkedin
import time
import json
import logging
from pprint import pprint

# workplace typr mapping function

def get_workplace_type(workplace_types):
   # Map workplace type URN to readable format
   workplace_mapping = {
       'urn:li:fs_workplaceType:1': 'On-site',
       'urn:li:fs_workplaceType:2': 'Remote',
       'urn:li:fs_workplaceType:3': 'Hybrid'
   }
   
   # Return workplace type if exists, otherwise "N/A"
   return workplace_mapping.get(workplace_types[0], "N/A") if workplace_types else "N/A"


# Setup logging for errors and debugging
logging.basicConfig(filename='job_scraper_errors.log', level=logging.ERROR)

# Authenticate using your LinkedIn credentials
email = ""  # Replace with your LinkedIn email
password = ""  # Replace with your LinkedIn password
api = Linkedin(email, password)

# Test authentication
try:
    profile = api.get_profile()
    print(f"Authenticated as: {profile['firstName']} {profile['lastName']}")
except Exception as e:
    print(f"Authentication failed: {e}")
    exit()

# Define job search parameters
search_params = {
    'keywords': 'data engineer',
    'location_name': 'Sydney',
    'count': 30,  # Results per page
    'limit': 3,  # Max results (adjust as needed)
    
    
}

# Initialize storage for job data
all_jobs = []

# Fetch jobs
try:
    print("\nSearching for jobs...")
    jobs = api.search_jobs(**search_params)
    print(f"Found {len(jobs)} jobs.")

    # Fetch job details
    for job in jobs:
        try:
            # Extract job ID
            job_id = job.get('id') or job.get('entityUrn', '').split(':')[-1]
            if not job_id:
                print("Job ID not found, skipping...")
                continue

            # Fetch job details
            job_details = api.get_job(job_id)
            job_skills = api.get_job_skills(job_id)

            # Debugging: Print available keys
            print(f"\nJob ID: {job_id}")
            print("-" * 30)
            print(f"Job keys: {list(job.keys())}")
            print(f"Job details keys: {list(job_details.keys())}")
            print(f"Job Skills Data for Job ID {job_id}:")
            pprint(job_skills)

            # Extract relevant fields

            # Company extraction
            company = (
                job.get('companyName') or
                job_details.get('companyName') or
                job_details.get('companyDetails', {}).get('companyName') or
                job_details.get('companyDetails', {}).get('company', {}).get('name') or
                job_skills.get('company', {}).get('name') or
                "N/A"
            )

            # Location extraction
            location = (
                job.get('formattedLocation') or
                job_details.get('formattedLocation') or
                job_details.get('locationDescription') or
                job_details.get('location', {}).get('city') or
                job_details.get('location', {}).get('country') or
                "N/A"
            )


            # Employment Status extraction
            employment_type = job_details.get('formattedEmploymentStatus') or "N/A"

            # Expeirence Level extraction
            seniority_level = job_details.get('formattedExperienceLevel') or "N/A"

            # Industries extraction
            industries = job_details.get('formattedIndustries') or "N/A"

            # Job Functions extraction
            job_functions = job_details.get('formattedJobFunctions') or "N/A"

            # Applies extraction
            applies = job_details.get('applies') or "N/A"

            # Work Place Type extraction
            workplace_type = get_workplace_type(job_details.get('workplaceTypes', []))

            # If reposted?
            reposted_job = job.get('repostedJob', "N/A")

            # Extract skills
            if 'skillMatchStatuses' in job_skills:
                skills = [
                    skill_status.get('skill', {}).get('name', 'Unknown Skill')
                    for skill_status in job_skills.get('skillMatchStatuses', [])
                ]
            else:
                skills = ["Skills not listed"]

            # Extract posting time
            listed_at = job.get('listedAt') or job_details.get('listedAt')
            if listed_at:
                # Convert LinkedIn timestamp to human-readable format
                post_time = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(listed_at / 1000))
            else:
                post_time = "N/A"

            
            # Extract expiring time
            expire_at = job.get('expireAt') or job_details.get('expireAt')
            if expire_at:
                # Convert LinkedIn timestamp to human-readable format
                expire_time = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(expire_at / 1000))
            else:
                expire_time = "N/A"


            

            # Store job data
            job_info = {
                "title": job.get('title', "N/A"),
                "company": company,
                "location": location,
                "empoyment type":employment_type,
                "seniority level":seniority_level,
                "industries":industries,
                "Job Functions":job_functions,
                "applies":applies,
                "workplace_type":workplace_type,
                "description": job_details.get('description', {}).get('text', "N/A"),
                "skills": skills,
                "job_url": f"https://www.linkedin.com/jobs/view/{job_id}",
                "if reposted": reposted_job,    
                "posted_time": post_time,
                "expire_time": expire_time
            }

            print(f"Fetched job: {job_info['title']} at {job_info['company']}")
            all_jobs.append(job_info)

            # Throttle requests
            time.sleep(2)

        except Exception as detail_error:
            logging.error(f"Error fetching details for job ID {job_id}: {detail_error}")
            print(f"Error fetching details for job ID {job_id}: {detail_error}")

except Exception as search_error:
    logging.error(f"Error searching for jobs: {search_error}")
    print(f"Error searching for jobs: {search_error}")

# Save jobs to a JSON file
output_file = "linkedin_jobs.json"
with open(output_file, 'w') as f:
    json.dump(all_jobs, f, indent=4)

print(f"\nJob data saved to {output_file}.")



Authenticated as: Jeff B.

Searching for jobs...
Found 3 jobs.

Job ID: 4088696105
------------------------------
Job keys: ['trackingUrn', 'repostedJob', 'title', '$recipeTypes', 'posterId', '$type', 'contentSource', 'entityUrn']
Job details keys: ['standardizedTitle', 'companyDetails', 'employmentStatusResolutionResult', 'employmentStatus', 'ownerViewEnabled', 'hiringDashboardViewEnabled', 'talentHubJob', 'formattedLocation', 'jobPostingUrl', 'applies', '$recipeType', 'applyingInfo', 'workplaceTypesResolutionResults', 'dashEntityUrn', 'eligibleForLearningCourseRecsUpsell', 'eligibleForSharingProfileWithPoster', 'originalListedAt', 'entityUrn', 'workRemoteAllowed', 'applyMethod', 'benefitsDataSource', 'savingInfo', 'formattedJobFunctions', 'dashJobPostingCardUrn', 'locationVisibility', 'expireAt', 'claimableByViewer', 'formattedIndustries', 'industries', 'salaryInsights', 'country', 'jobPosterEntitlements', 'standardizedTitleResolutionResult', 'locationUrn', 'jobApplicationLimitReache