### Scraping the short information from the job board 

In [380]:
# importing modules and libraries for further work with them
import requests
from bs4 import BeautifulSoup
import pandas as pd

# varibale and list declaration
job_list = []
data_all = []

print("Input job title, keywork or company name : ")
job = str(input())
print("Input city or country to see the result : ")
country = str(input())
# Url for target scrapping 
search = "https://www.careerjet.com.bd/search/jobs?s="+job+"&l="+country
print(search)

# Http request for getting the raw data from the website 
r = requests.get(search)
r.status_code

# parsing the html document using the html parser 
soup = BeautifulSoup(r.text, 'html.parser')

# lets findout each job post container in a list
a = soup.find_all("article", class_=["job","clicky"])

# spliting each item and converting into a list for further processing
for i in a:
    job_list.append([i])

# iterating over all the list items and extracting the value from those
for each_list_item in job_list:
    for k in each_list_item:
        job_title = k.find('a').string.strip()
        
        # checking whether the company is empty or not
        try:
            company = k.find('p', class_="company").string.strip()
        except:
            company = " "
        
        # checking whether the country is empty or not
        details = k.find('ul', class_="details")
        try:
            country = details.find_all("li")[0].get_text().strip()
        except:
            country = " "
            
        # checking salary the country is empty or not
        try:
            salary = details.find_all("li")[1].get_text().strip()
        except:
            salary = " "
            
        # checking information regarding description
        description = k.find("div", class_="desc").get_text().strip()
        
        #appending all the values in form of list to the data_all list to create DataFrame in future
        data_all.append([job_title, company, salary, country, description])
        

# lets store all the data in the pandas dataframe
df = pd.DataFrame(data_all, columns=["Job_Title", "Company", "Salary", "Location", "Description | Job Responsibilities"])
df.to_csv("Job_Data_From_CareerJet.csv")

Input job title, keywork or company name : 
intern
Input city or country to see the result : 
dhaka
https://www.careerjet.com.bd/search/jobs?s=intern&l=dhaka


Unnamed: 0,Job_Title,Company,Salary,Location,Description | Job Responsibilities
0,Intern,,,Dhaka,Job Context Desk Job 2 vacancies are for Ut...
1,Intern - Affiliation,Daraz,,Dhaka,Job Description: Major Responsibilities : ...
2,Software Engineer Intern,NewsCred Ltd,,Dhaka,Job Summary We are looking for Full-Time Int...
3,Research Intern,,,Dhaka,Job Context binary data lab offers internship...
4,Admission for CA Interns,,"Tk. 10,000 per month",Dhaka,Job Context We are looking for CA interns to...
5,Software Engineer Intern,,,Dhaka,Job Responsibilities Overview of the Positio...
6,Intern under the Political Party Team,,,Dhaka,Job Context The USAID-funded Strengthening P...
7,Intern - CSR & Sustainable Development,Daraz,,Dhaka,Job Description: Major Responsibilities : ...
8,React Native Interns (Unpaid),Empathy Nation,,Dhaka,Job Summary Empathy Nation (an e-learning & ...
9,Business Development (Intern),,,"Khilkhet, Dhaka",Job Responsibilities Conduct research to ide...


### Notebook instances

In [302]:
# importing modules and libraries for further work with them
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [303]:
# varibale and list declaration
job_list = []
data_all = []

In [304]:
# Url for target scrapping 
URL = "https://www.careerjet.com.bd/search/jobs?s=software+developer&l=Bangladesh"

# Http request for getting the raw data from the website 
r = requests.get(URL)
r.status_code

200

In [305]:
# parsing the html document using the html parser 
soup = BeautifulSoup(r.text, 'html.parser')

# lets findout each job post container in a list
a = soup.find_all("article", class_=["job","clicky"])

In [306]:
# spliting each item and converting into a list for further processing
for i in a:
    job_list.append([i])

# iterating over all the list items and extracting the value from those
for each_list_item in job_list:
    for k in each_list_item:
        job_title = k.find('a').string.strip()
        
        # checking whether the company is empty or not
        try:
            company = k.find('p', class_="company").string.strip()
        except:
            company = " "
        
        # checking whether the country is empty or not
        details = k.find('ul', class_="details")
        try:
            country = details.find_all("li")[0].get_text().strip()
        except:
            country = " "
            
        # checking salary the country is empty or not
        try:
            salary = details.find_all("li")[1].get_text().strip()
        except:
            salary = " "
            
        # checking information regarding description
        description = k.find("div", class_="desc").get_text().strip()
        
        #appending all the values in form of list to the data_all list to create DataFrame in future
        data_all.append([job_title, company, salary, country, description])

In [307]:
# lets store all the data in the pandas dataframe
df = pd.DataFrame(data_all, columns=["Job_Title", "Company", "Salary", "Location", "Description | Job Responsibilities"])
df.to_csv("Job_Data_From_CareerJet.csv")

### Scraping whole job posting one by one by visiting each nested url (Advanced)

In this section we will :

- Go through the short card job items
- Extract the link associated with that
- Visit each link individually and will scrap all the data
- Then each page wise full information will be extracted

In [369]:
# importing modules and libraries for further work with them
import requests
from bs4 import BeautifulSoup
import pandas as pd

# varibale and list declaration
job_list = []
data_all = []
items_link = []
links = []
bulk_response = []

# Url for target scrapping
BASE_URL = 'https://www.careerjet.com.bd/'
URL = "https://www.careerjet.com.bd/search/jobs?s=software+developer&l=Bangladesh"

# Http request for getting the raw data from the website 
r = requests.get(URL)
r.status_code

# parsing the html document using the html parser 
soup = BeautifulSoup(r.text, 'html.parser')

# lets findout each job post container in a list
items = soup.find_all("article", class_=["job","clicky"])

# extracting the links from each item of the job posting
for each in items:
    a = each.select('a[href^="/jobad"]')
    for e in a:
        links.append(e.get("href"))

# got the links, now lets navigate to each link and extract the data. THis will be  a bulk response for sure
for link in links:
    url = BASE_URL+str(link)
    res = requests.get(url)
    bulk_response.append(res)

In [370]:
bulk_response

[<Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>]

In [374]:
bulk_response[1].content

