In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup as bs
from datetime import datetime
import re

def individual_extract(url):
    # Fetch the web page
    page = requests.get(url)
    soup = bs(page.text,'lxml')

    # Extract company name
    company_element = soup.find('a', class_='topcard__org-name-link topcard__flavor--black-link')
    company_name = company_element.get_text(strip=True) if company_element else 'N/A'

    # Extract job description
    description_element = soup.find('div', class_='show-more-less-html__markup show-more-less-html__markup--clamp-after-5 relative overflow-hidden')
    html_tag_pattern = re.compile(r'<.*?>')
    cleaned_content = re.sub(html_tag_pattern, '\n', str(description_element))
    
    # Extract job title
    job_title_element = soup.find('h1', class_='top-card-layout__title font-sans text-lg papabear:text-xl font-bold leading-open text-color-text mb-0 topcard__title')
    job_title = job_title_element.get_text(strip=True) if job_title_element else 'N/A'

    # Return the extracted data as a dictionary
    return {
        'job_title': job_title,
        'company_name': company_name,
        'description': cleaned_content
    }

def copytocsv(text, date_str):
    formatted_text = text.split()
    updated_text = "%20".join(formatted_text)
    job_list = []
    for num in range(0, 100):
        url = f"https://www.linkedin.com/jobs-guest/jobs/api/seeMoreJobPostings/search?keywords={updated_text}&location=India&geoId=102713980&f_TPR=&f_E=2&position=1&pageNum=0&start={num*10}"
        try:
            r = requests.get(url)
            if r.status_code == 200:
                soup = bs(r.text, "lxml")
                divs = soup.find_all('div', class_='base-card relative w-full hover:no-underline focus:no-underline base-card--link base-search-card base-search-card--link job-search-card')
                if len(divs)==0: break
                for d in divs:
                    a_tag = d.find('a', class_='base-card__full-link absolute top-0 right-0 bottom-0 left-0 p-0 z-[2]')
                    if a_tag and 'href' in a_tag.attrs:
                        job_link = a_tag.get("href")
                        job_data = individual_extract(job_link)
                        job_list.append(job_data)
            else:
                print("Not a successful response with response code:", r.status_code)
        except requests.exceptions.RequestException as e:
            print(f"An error occurred: {e}")

    # Create a DataFrame from the job list
    df = pd.DataFrame(job_list)
    # Create a filename using the job title and current date
    filename = f'{text}_{date_str}.csv'
    # Save the DataFrame to a CSV file
    df.to_csv(filename, index=False)
    print(f'Saved: {filename}')

# Main script
number_job_titles = int(input("Enter the number of job titles: "))
search_text = []
for i in range(number_job_titles):
    search_text.append(input(f"Enter job title {i + 1}: "))

print("Your job titles are:")
print(search_text)

# Get the current date
current_date = datetime.now().strftime('%Y-%m-%d')

# Process each job title and save to separate CSV files
for search in search_text:
    copytocsv(search, current_date)


Enter the number of job titles: 1
Enter job title 1: Strategy Consultant
Your job titles are:
['Strategy Consultant']
Saved: Strategy Consultant_2024-05-23.csv
