 # Project 4: Web Scraping with Beautifulsoup

# BY Hari Krishna

In [None]:
Web Scraping with BeautifulSoup
• Description: Use the BeautifulSoup library to scrape data from a
website and save it into a CSV file. You can scrape news headlines,
weather data, or other publicly available information.
• Skills: Web scraping, parsing HTML, working with CSV files.

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the Naukri job listings (Data Analyst jobs in Hyderabad/ Secunderabad)
url = "https://www.naukri.com/data-analyst-jobs-in-hyderabad-secunderabad"

# Step 1: Send GET request to fetch the HTML content of the page
response = requests.get(url)

# Step 2: Check if the request was successful
if response.status_code == 200:
    print("Successfully retrieved the webpage.")
    page_content = response.content
else:
    print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
    exit()

# Step 3: Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(page_content, 'html.parser')

# Step 4: Find the relevant data (job titles, company names, etc.)
# In Naukri, the job titles are usually within <a> tags with class 'title'
job_titles = soup.find_all('a', class_='title')

# Step 5: Extract the text of each job title and link
job_listings = []
for job in job_titles:
    job_title = job.get_text(strip=True)
    job_link = job['href']
    job_listings.append([job_title, job_link])

# Step 6: Find the corresponding company names
companies = soup.find_all('a', class_='subTitle')

company_names = [company.get_text(strip=True) for company in companies]

# Step 7: Combine the job titles, companies, and links into a DataFrame
job_data = []
for i in range(len(job_listings)):
    job_data.append([job_listings[i][0], company_names[i] if i < len(company_names) else 'Not Available', job_listings[i][1]])

# Convert to a DataFrame
df = pd.DataFrame(job_data, columns=['Job Title', 'Company Name', 'Job Link'])

# Step 8: Save the job listings to a CSV file
df.to_csv('data_analyst_jobs_in_hyderabad.csv', index=False)

print("Job listings have been saved to 'data_analyst_jobs_in_hyderabad.csv'")


Successfully retrieved the webpage.
Job listings have been saved to 'data_analyst_jobs_in_hyderabad.csv'
