# Importing the required libraries

In [1]:
import requests
import bs4
from bs4 import BeautifulSoup
import pandas as pd
import time
from tqdm import tqdm

# Methods to perform scraping

The method names are self-explanatory. We are scraping Job Title, Company Name, Location, Salary, and Job Summary.

In [None]:
def extract_job_title_from_result(soup): 
    jobs = []
    for div in soup.find_all(name="div", attrs={"class":"row"}):
      for a in div.find_all(name="a", attrs={"class":"jobtitle"}):
        jobs.append(a["title"])
    return(jobs)

def extract_company_from_result(soup): 
  companies = []
  for div in soup.find_all(name="div", attrs={"class":"row"}):
    all_company = div.find_all(name="span", attrs={"class":"company"})
    for company in all_company:
      companies.append(company.text.strip())    
  return(companies)

def extract_location_from_result(soup): 
  locations = []
  spans = soup.findAll("span", attrs={"class": "location"})
  for span in spans:
    locations.append(span.text)
  return(locations)

def extract_salary_from_result(soup): 
  salaries = []
  for div in soup.find_all(name="div", attrs={"class":"row"}):
    try:
      span = div.find(name="span", attrs={"class":"salaryText"})
      salaries.append(span.text.strip())
    except:
      salaries.append("Nothing_found")
  return(salaries)

def extract_summary_from_result(soup): 
  summaries = []
  divs = soup.findAll("div", attrs={"class": "summary"})
  for div in divs:
    summaries.append(div.text.strip())
  return(summaries)

# Crawling & Scraping from indeed.in

Here, we have performed the actual crawling and scraping. We crawled **indeed.in** for three different job roles, namely **Data Scientist**, **Machine Learning Engineer**, and **Deep Learning Engineer**.

In [9]:
# storing the indexes of the search page url's
search_page_list = []
for i in range(0,1001,10):
  search_page_list.append(i)

# defining the lists to store respective data
job_title_list = []
company_list = []
location_list = []
salary_list = []
summary_list = []

# a list of three URL's for three job roles mentioned above.
base_URL = ["https://in.indeed.com/jobs?q=data+scientist&start=", "https://in.indeed.com/jobs?q=machine+learning+engineer&start=", "https://in.indeed.com/jobs?q=deep+learning+engineer&start="]

for url in base_URL:
  for page in tqdm(search_page_list):
    URL = url + str(page)
    page = requests.get(URL)
    soup = BeautifulSoup(page.text, "html.parser")

    job_title_list.extend(extract_job_title_from_result(soup))
    company_list.extend(extract_company_from_result(soup))
    location_list.extend(extract_location_from_result(soup))
    salary_list.extend(extract_salary_from_result(soup))
    summary_list.extend(extract_summary_from_result(soup))
  

column_list = ["Job Title", "Company Name", "Location", "Salary", "Summary"]

data = pd.DataFrame(list(zip(job_title_list, company_list, location_list, salary_list, summary_list)), columns =column_list)

data

100%|██████████| 101/101 [01:01<00:00,  1.63it/s]
100%|██████████| 101/101 [01:27<00:00,  1.15it/s]
100%|██████████| 101/101 [00:22<00:00,  4.46it/s]


Unnamed: 0,Job Title,Company Name,Location,Salary,Summary
0,Junior Data Scientist/ta,WSD Consultant,"Gurgaon, Haryana","₹7,00,000 - ₹15,00,000 a year",These programs help them master data-driven de...
1,Data Scientist (Risk Analytics & Modeling),LoanXpress,"Mumbai, Maharashtra",Nothing_found,Manage large loan level and borrower level dat...
2,Data Scientist,AI Engineer,"Delhi, Delhi","₹17,000 - ₹25,000 a month",Ability to use data visualization tools to sho...
3,Data Scientist,NatWest Group,"Gurgaon, Haryana",Nothing_found,"Participating in the data community, you’ll id..."
4,DATA SCIENTIST,Rockwell Automation,"Pune, Maharashtra",Nothing_found,Experience implementing data science models in...
...,...,...,...,...,...
2990,Senior Analyst-DevOps Engineer-Hyderabad,Deloitte,"Hyderabad, Telangana",Nothing_found,Who has cross-functional knowledge and deep ex...
2991,GPU Compiler Performance Engineer,Qualcomm India Private Limited,"Bengaluru, Karnataka",Nothing_found,Experience with machine learning / deep learni...
2992,Senior Specialist - Analytics,Fidelity International,"Gurgaon, Haryana",Nothing_found,Implementation of deep learning models with cl...
2993,Senior Software Engineer - SDN,Microsoft,"Bengaluru, Karnataka",Nothing_found,"Has passion for learning, be a faster learner...."


In [10]:
# Storing the output as a csv file
data.to_csv("indeed_jobs.csv")