#### Finding Data Analyst Jobs in Gurugram, Harayana, India.  
##### These filter are already applied on the website.

In [1]:
# Import the necessary libraries
from bs4 import BeautifulSoup  # For parsing HTML
import requests  # For making HTTP requests
import pandas as pd  # For data manipulation and analysis

In [2]:
# Define the URL of the website to scrape
website = "https://www.linkedin.com/jobs/search?keywords=Data%20Analyst&location=Gurugram%2C%20Haryana%2C%20India&geoId=115884833&trk=public_jobs_jobs-search-bar_search-submit&position=1&pageNum=0"

# Send an HTTP request to the URL and get the response
result = requests.get(website)
content = result.text

# Parse the HTML content of the webpage using BeautifulSoup
# 'lxml' is one of the parsers supported by BeautifulSoup
soup = BeautifulSoup(content, 'lxml')

In [3]:
# print(soup.prettify())

In [4]:
# Find the <ul> element with the class 'jobs-search__results-list' in the parsed HTML content
box = soup.find('ul', class_ = 'jobs-search__results-list')

In [5]:
# Initializing an empty list to store the data extracted
df_data = []

# Loop through each <li> element within the <ul> element
for data in box.find_all('li'):
    
    # Extract the job title from the <h3> element with the class 'base-search-card__title'
    title = data.find('h3', class_='base-search-card__title').get_text().strip()
    
    # Extract the subtitle (if available) from the <a> element with the class 'hidden-nested-link'
    subtitle_element = data.find('a', class_='hidden-nested-link')
    if subtitle_element:
        subtitle = subtitle_element.get_text().strip()
    else:
        subtitle = 'Subtitle not found'
    
    # Extract the city/location from the <span> element with the class 'job-search-card__location'
    city = data.find('span', class_='job-search-card__location').get_text().strip()
    
    # Extract the posting time from the <time> element with the class 'job-search-card__listdate'
    time_element = data.find('time', class_='job-search-card__listdate')
    if time_element:
        time = time_element.get_text().strip()
    else:
        time = 'Time not found'
        
    # Append the extracted data to the list
    df_data.append([title, subtitle, city, time])
        
# Create a DataFrame from the collected data with appropriate column names
df = pd.DataFrame(df_data, columns=['title', 'subtitle', 'city', 'time'])

In [6]:
# Save the DataFrame to a CSV file named 'data_gurugram'
# Set index=False to avoid writing row indices to the CSV file
df.to_csv('data_gurugram', index=False)

In [7]:
# Print the df
df

Unnamed: 0,title,subtitle,city,time
0,Data Analyst,EXL,"Gurugram, Haryana, India",1 week ago
1,Data Analyst,Concentrix,"Gurugram, Haryana, India",3 weeks ago
2,Data Analyst,Ticketmaster,"Gurugram, Haryana, India",1 week ago
3,Analyst- Data science,American Express,"Gurugram, Haryana, India",2 days ago
4,Logistics Data Analyst,Circle K,"Gurugram, Haryana, India",1 week ago
5,Data Analytics & Strategy,Airtel Payments Bank,"Gurugram, Haryana, India",4 weeks ago
6,Data Analyst / Senior Data Analyst,Urban Company,"Gurugram, Haryana, India",1 week ago
7,Data Engineer,Nykaa,"Gurugram, Haryana, India",3 weeks ago
8,Business Intelligence Analyst,Concentrix,"Gurugram, Haryana, India",3 weeks ago
9,Analyst - Data Science,United Airlines,"Gurugram, Haryana, India",2 weeks ago
