-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
134 lines (103 loc) · 5.71 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from selenium import webdriver
from selenium.webdriver.common.by import By
from time import sleep
from openpyxl import Workbook
class NobitexBot:
driver = webdriver.Chrome()
def getPrices(self,
delay: float = 2,
infinityRequest : bool = True,
optionalDelay : float = 0,
link : str = None,
element : str = None,
exportInExcel : bool = False,
excelPath : str = "./",
*args, **kwargs
) -> None:
"""
Extracts prices from a webpage and optionally exports them to an Excel file.
Args:
delay (float): The delay between each extraction request in seconds; change it if needed. Default is 2 seconds.
infinityRequest (bool): If True, continuously extracts prices until manually stopped. Default is True.
optionalDelay (float): Additional delay between each extraction request in seconds; if your internet connection is fast you can set this to 0 if not put 0.5> optionalDelay. Default is 0.
link (str): The URL of the webpage from which to extract prices.
element (str): The XPath of the HTML element containing the prices.
exportInExcel (bool): If True, exports the extracted prices to an Excel file. Default is False.
excelPath (str): The directory path where the Excel file will be saved. Default is current directory. Required if `exportInExcel` is True.
*args: Variable positional arguments.
**kwargs: Variable keyword arguments. Used to specify `timePeriod` if `infinityRequest` is False.
Returns:
None: This method does not return any value.
Raises:
ValueError: If `infinityRequest` is False and `timePeriod` is not an integer or not set.
Exception: Any other exceptions that occur during the extraction process.
Example usage:
scraper = NobitexBot()
scraper.getPrices(delay=3,
infinityRequest=True,
optionalDelay=0.5,
link="https://nobitex.ir/en/prices/",
element="/html/body/div/div/div/div/div[2]/div/section[3]/div/main/div/div/div[2]/table/tbody",
exportInExcel=True,
excelPath="./",
kwargs={"timePeriod": 60})
"""
try:
if exportInExcel:
if element == None or link == None:
raise ValueError("Link or elements must be filled")
if infinityRequest:
while True:
self.driver.get(link)
sleep(optionalDelay)
selectedElement = self.driver.find_element(By.XPATH, element)
rawData = selectedElement.text
rawData = rawData.replace("Buy & Sell\n",'')
rawData = rawData.split('\n')
before = 0
result = []
for i,text in enumerate(rawData):
if text.isdigit() and int(text) != 1:
result.append(rawData[before:i])
before = i
if exportInExcel:
wb = Workbook()
ws = wb.active
ws = wb.active
for row in result:
ws.append(row[1:5])
wb.save(excelPath + "prices.xlsx")
print("Prices updated successfully")
sleep(delay)
else:
for key , value in kwargs.items():
if key == "timePeriod" and isinstance(value,int):
timePeriod : int = value
else:
raise ValueError("Time period must be integer")
for i in range(timePeriod):
self.driver.get(link)
sleep(optionalDelay)
selectedElement = self.driver.find_element(By.XPATH, element)
rawData = selectedElement.text
rawData = rawData.replace("Buy & Sell\n",'')
rawData = rawData.split('\n')
before = 0
result = []
for i,text in enumerate(rawData):
if text.isdigit() and int(text)!= 1:
result.append(rawData[before:i])
before = i
if exportInExcel:
wb = Workbook()
ws = wb.active
ws = wb.active
for row in result:
ws.append(row[1:5])
wb.save(excelPath + "prices.xlsx")
sleep(delay)
except Exception as e:
import sys
print(f"An error has occured during extraction process.\nmore details : {e}")
self.driver.quit()
sys.exit(1)