In [8]:
# Scraping job listings from TimesJobs for the first 30 pages
from tqdm import tqdm
from bs4 import BeautifulSoup
import requests
import pandas as pd

# Initialize lists to store job data
job_titles = []
experience_levels = []
companies = []
skills_required = []
descriptions = []
salaries = []
locations = []
job_links = []
posting_dates = []

# Iterate over the first 30 pages of job listings
for page in tqdm(range(1, 31)):
    # Fetch the HTML content for the current page
    url = f"https://www.timesjobs.com/candidate/job-search.html?from=submit&luceneResultSize=25&txtKeywords=data%20science&postWeek=60&searchType=personalizedSearch&actualTxtKeywords=Data%20Science&searchBy=0&rdoOperator=OR&pDate=I&sequence={page}&startPage=1"
    response = requests.get(url)
    html_content = response.text
    
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(html_content, "html.parser")
    
    # Find all job postings on the page
    job_postings = soup.find_all("li", class_="clearfix job-bx wht-shd-bx")
    
    for posting in job_postings:
        
        # from each 
        job_title = posting.find("h2").text.strip()
        experience = posting.find("ul", class_="top-jd-dtl clearfix").li.text[11:]
        company = posting.find("h3", class_="joblist-comp-name").text.strip()[:-11]
        skills = posting.find("span", class_="srp-skills").text.strip()
        description = posting.find("ul", class_="list-job-dtl clearfix").li.text
        salary = posting.find("ul", class_="top-jd-dtl clearfix").text.strip()[20:]
        posted_date = posting.find("span", class_="sim-posted").text.strip()
        job_link = posting.header.h2.a["href"]
        
        # Append extracted details to lists
        job_titles.append(job_title)
        experience_levels.append(experience.strip())
        companies.append(company.strip())
        skills_required.append(skills.strip())
        descriptions.append(description.strip()[18:])
        
        # Extract and clean salary information
        if "₹Rs" in salary:
            salaries.append(salary[2:20])
            locations.append(salary[40:])
        else:
            salaries.append("Not Available")
            locations.append(salary[14:40])
        
        job_links.append(job_link.strip())
        posting_dates.append(posted_date)
    
    # into a df
    job_data = pd.DataFrame({
        "Job Title": job_titles,
        "Experience Level": experience_levels,
        "Company": companies,
        "Skills Required": skills_required,
        "Salary": salaries,
        "Description": descriptions,
        "Job Link": job_links,
        "Location": locations,
        "Posted Date": posting_dates
    })
    
    # Save the DataFrame to a CSV file
    job_data.to_csv("job_listings.csv", index=False)

# Display the first few rows and summary of the DataFrame
print(job_data.head())
print(job_data.info())

# Print unique values count for each column
for column in job_data.columns:
    print(f"{column} >>> Unique values count: {job_data[column].nunique()}")

# Print count of different experience levels
experience_counts = job_data["Experience Level"].value_counts()
print(experience_counts)

100%|██████████| 30/30 [00:56<00:00,  1.87s/it]


                         Job Title Experience Level  \
0                     Data Science        3 - 5 yrs   
1                     Data Science        4 - 6 yrs   
2                     Data Science        3 - 6 yrs   
3  Data Science Internship in Pune        0 - 1 yrs   
4        Data Science Manager Jobs        2 - 7 yrs   

                                 Company  \
0                     tcg digital soluti   
1                             innefu lab   
2                                    bpr   
3                    Maxgen Technologies   
4  SKYWALK VISA IMMIGRATION SERVICES LLP   

                                     Skills Required              Salary  \
0  data analytics  ,  functional analysis  ,  pre...       Not Available   
1  procedures  ,  data collection  ,   Commercial...       Not Available   
2  hive  ,  algorithms  ,  authoring  ,  data min...       Not Available   
3           internship  ,  data science  ,  power bi       Not Available   
4  Data Science  ,  Data

In [4]:

job_titles = []
experience_levels = []
companies = []
skills_required = []
descriptions = []
salaries = []
locations = []
job_links = []
posting_dates = []
