# Data Science Jobs From Naukri.Com

In [16]:
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import time

# Set the path to the Chrome driver
os.environ['PATH'] += r'D:\End To End ML\web_scraping by selenium'

# Initialize the Chrome driver
driver = webdriver.Chrome()

# Define the URL to scrape
naukri_url = 'https://www.naukri.com/'
driver.get(naukri_url)

# Input the search key (Data Science in this example)
search_placeholder = driver.find_element(By.CLASS_NAME, "suggestor-input")
search_placeholder.send_keys('Data Science')

# Click the search button
search_button = driver.find_element(By.CLASS_NAME, "qsbSubmit")
search_button.click()

# Handle URL transformation
original_url = "https://www.naukri.com/"
given_input = "data science"

# Replace spaces with hyphens and make it lowercase
modified_input = given_input.replace(" ", "-").lower()

# Create the new URL
new_url = f"{original_url}{modified_input}-jobs?k={modified_input}"

# Wait for the URL to change
timeout = 5  # Set a timeout in seconds
start_time = time.time()


# Create empty lists to store data
job_titles = []
company_names = []
experience_required = []
package_details = []
locations = []
skills = []
post_link = []
post_time = []



# Iterate over the page numbers
for page_number in range(1, 2001):

    # Navigate to the current page
    driver.get(new_url + '&pageNo=' + str(page_number))

    # Implicitly wait for 10 seconds
    driver.implicitly_wait(5)

    # Locate and scrape job titles
    title_elements = driver.find_elements(By.CLASS_NAME, "title")
    for element in title_elements:
        title = element.text
        job_titles.append(title)

    # Locate and scrape company names
    company_elements = driver.find_elements(By.CLASS_NAME, "comp-name")
    for element in company_elements:
        company_name = element.text
        company_names.append(company_name)

    # Locate and scrape experience required
    experience_elements = driver.find_elements(By.CLASS_NAME, "expwdth")
    for element in experience_elements:
        experience = element.text
        experience_required.append(experience)

    # Locate and scrape package details
    package_elements = driver.find_elements(By.CLASS_NAME, "ni-job-tuple-icon-srp-rupee")
    for element in package_elements:
        nested_span = element.find_element(By.TAG_NAME, "span")
        package = nested_span.text
        package_details.append(package)

    # Locate and scrape locations
    location_elements = driver.find_elements(By.CLASS_NAME, 'locWdth')
    for element in location_elements:
        location = element.text
        locations.append(location)

    # Locate and scrape skills
    skills_elements = driver.find_elements(By.CLASS_NAME, 'tags-gt')
    for element in skills_elements:
        skill = element.text
        skills.append(skill)

    # Post_Link
    post_links = driver.find_elements(By.CLASS_NAME, 'title')
    for element in post_links:
        href = element.get_attribute("href")
        post_link.append(href)

    # Post Time
    post_time_elements = driver.find_elements(By.CLASS_NAME, 'job-post-day')
    for element in post_time_elements:
        time = element.text
        post_time.append(time)

    # Check if there are no more job listings on the page
    no_results_element = driver.find_elements(By.CLASS_NAME, "srp-noResults")
    if no_results_element:
        break

# Close the web driver when you're done
driver.quit()

# Ensure all lists have the same length by padding with "None"
max_length = max(len(job_titles), len(company_names), len(experience_required), len(package_details),
                 len(locations), len(skills), len(post_link), len(post_time))

job_titles.extend(["None"] * (max_length - len(job_titles)))
company_names.extend(["None"] * (max_length - len(company_names)))
experience_required.extend(["None"] * (max_length - len(experience_required)))
package_details.extend(["None"] * (max_length - len(package_details)))
locations.extend(["None"] * (max_length - len(locations)))
skills.extend(["None"] * (max_length - len(skills)))
post_links.extend(["None"] * (max_length - len(post_link)))
post_time.extend(["None"] * (max_length - len(post_time)))

# Create a DataFrame
data = {
    'Job_Titles': job_titles,
    'Company_Names': company_names,
    'Experience_Required': experience_required,
    'Package_Details': package_details,
    'Locations': locations,
    'Skills': skills,
    'Post_Url': post_link,
    'Post_Time': post_time
}

df = pd.DataFrame(data)

# Display the DataFrame
print(df.shape)

(47191, 8)


In [17]:
df.head()

Unnamed: 0,Job_Titles,Company_Names,Experience_Required,Package_Details,Locations,Skills,Post_Url,Post_Time
0,Senior Manager - Data Science,AMERICAN EXPRESS,4-8 Yrs,Not disclosed,Gurgaon/Gurugram,Text miningCareer developmentdata scienceFinan...,https://www.naukri.com/job-listings-senior-man...,1 Day Ago
1,Advance Analytical and Data Sciences -Manager,G R Infraprojects,6-10 Yrs,Not disclosed,"Gurgaon/ Gurugram, Haryana",Advanced Statistical AnalysisData Governance a...,https://www.naukri.com/job-listings-advance-an...,7 Days Ago
2,Manager - Digital Product Analytics [Data Scie...,Resy,4-8 Yrs,Not disclosed,Gurgaon/Gurugram,Product managementCareer developmentOperations...,https://www.naukri.com/job-listings-manager-di...,4 Days Ago
3,Data Science Manager,Foreign IT Consulting MNC,1-3 Yrs,Not disclosed,"Mumbai, Hyderabad/Secunderabad, Pune, Gurgaon/...",Change managementSASCodingData miningPythonRst...,,10 Days Ago
4,Data Science Manager,Fortune India 500 Company in FMCG,4-9 Yrs,Not disclosed,Bangalore/Bengaluru,Data analysisdata scienceAnalyticalPredictive ...,,1 Day Ago


In [18]:
df.to_csv('Naukri_Data_Science_keyword.csv', index=False)