-
Notifications
You must be signed in to change notification settings - Fork 0
/
remtive.py
100 lines (89 loc) · 3.83 KB
/
remtive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import selenium
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
# path to chromedrive
path = Service('C:\webdrivers\chromedriver')
#setting the driver up
driver = webdriver.Chrome(service=path)
# connecting to website
driver.get('https://remotive.com/')
# setting random waits so i don't get detected
time.sleep(1)
# browser full screen
driver.maximize_window()
# random wait
time.sleep(1)
# timer for waiting until element is located
wait = WebDriverWait(driver, 10)
#closing banner that is blocking screen
#cban = driver.find_element(By.CSS_SELECTOR, 'button[aria-label="Dismiss Message"]')
cban = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'button[aria-label="Dismiss Message"]')))
cban.click()
time.sleep(1)
# finding and clicking category in the search bar
cate = driver.find_element(By.XPATH, '//*[@id="categories"]/div/select')
ActionChains(driver).move_to_element(cate).perform()
cate.click()
time.sleep(1)
#choosing customer support from the category
cate1 = driver.find_element(By.XPATH, '//*[@id="categories"]/div/select/option[5]/span')
cate.send_keys('c', Keys.ENTER)
time.sleep(1)
# element for location box
loc2 = driver.find_element(By.XPATH, '//*[@id="location-filter"]/div/ul/li[1]/div/label/span/img')
# clicking location box
ActionChains(driver).move_to_element(loc2).click().perform()
time.sleep(1)
# clicks element that sort jobs by relevance/date
sort = driver.find_element(By.CSS_SELECTOR, "div[class='ais-SortBy tw-py-4 tw-px-3']")
ActionChains(driver).move_to_element(sort).click().perform()
time.sleep(1)
#choosing sort by date
sort1 = driver.find_element(By.CSS_SELECTOR, 'select.ais-SortBy-select')
sort1.send_keys(Keys.ARROW_DOWN, Keys.ENTER)
time.sleep(1)
# selecting unordered list containing jobs and setting waiting until its present
# when searching list element use find_elements by xpath
ulist = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="hits"]/ul')))
# getting html for bs4
page = ulist.get_attribute('outerHTML')
#ending selenium session
driver.close()
# setting bs4 for scraping the data from html
soup = BeautifulSoup(page, "lxml")
# variable for the element containing the jobs, all customer service jobs are in this element
jobs = soup.find_all('div', class_="job-tile remotive-bg-light")
# list for storing the jobs
jl = []
# for loop to locate data from the jobs element
for x in jobs:
# job title as string
title = x.find('div', class_="job-tile-title").span.text
# adding to list also '\n' is necessary so the file won't have just one line of text
jl.append(title + '\n')
# company names as string
company = x.find('div', class_="job-tile-title").find('span', class_="tw-block md:tw-hidden").text
# adding to list also '\n' is necessary so the file won't have just one line of text
jl.append(company + '\n')
# links to jobs dont need .text at the end as it's already string
info = x.find('div', class_="job-tile-title").a['href']
# adding to list also '\n' is necessary so the file won't have just one line of text
jl.append(info + '\n')
# date published as string
publ = x.find('div', class_="tw-hidden sm:tw-flex tw-items-center tw-justify-between tw-w-auto").span.span.text
# adding to list also '\n' is necessary so the file won't have just one line of text
jl.append(publ + '\n\n')
#storing to file
file = open('jobs.txt','w')
for items in jl:
file.writelines(items)
file.close()