Skip to content

Commit 1b51db4

Browse files
Merge pull request avinashkranjan#939 from AshuKV/LeetcodeScrapper/AshuKV
Leetcode Scrapper
2 parents 389b0a2 + c614a71 commit 1b51db4

File tree

3 files changed

+132
-0
lines changed

3 files changed

+132
-0
lines changed

LeetCode-Scrapper/README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# LeetCode Scraper
2+
This python script will let the user to scrape 'n' number of LeetCode problems from any category/difficulty in [Leetcode](https://leetcode.com/problemset/all), as provided by the user. The functionality of the script is to gain the information regarding particular leetcode problem in different PDFs.
3+
4+
## Prerequisites:
5+
Download the required packages from the following command in you terminal.(Make sure you're in the same project directory)
6+
7+
` pip3 install -r requirements.txt `
8+
9+
To run this script, you need to have selenium installed and configure webdriver to use chrome browser in your$PATH. You can directly download chrome driver from the link below- https://chromedriver.chromium.org/downloads. Then, just enter the chrome driver path as asked in the prompt.
10+
11+
## Running the script:
12+
After installing all the requirements,run this command in your terminal.
13+
14+
` python3 ques.py `
15+
16+
## Output:
17+
This script will generate 'n' number of different PDFs in the same folder to store the problem information, specifically problem title, problem statement, test cases, and the problem link.

LeetCode-Scrapper/ques.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
from selenium import webdriver
2+
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
3+
from selenium.webdriver.support.ui import WebDriverWait
4+
from selenium.webdriver.support import expected_conditions as EC
5+
from selenium.webdriver.common.by import By
6+
from selenium.common.exceptions import NoSuchElementException
7+
from selenium.common.exceptions import TimeoutException
8+
import os
9+
from fpdf import FPDF
10+
11+
options = webdriver.ChromeOptions()
12+
options.add_argument("--headless")
13+
14+
15+
capa = DesiredCapabilities.CHROME
16+
capa["pageLoadStrategy"] = "none"
17+
18+
print("Enter Chrome Driver path: ")
19+
input_driver_path = input()
20+
driver = webdriver.Chrome(input_driver_path)
21+
#the base url of leetcode problem set page
22+
baseurl="https://leetcode.com/problemset/all"
23+
wait = WebDriverWait(driver, 15)
24+
25+
#the difficulty level of all the of all the problems
26+
problem_difficulty = {"Easy": "?difficulty=Easy", "Medium": "?difficulty=Medium", "Hard": "?difficulty=hard"}
27+
28+
def get_problem(category, no_of_problems):
29+
30+
prblm_info = {}
31+
try:
32+
#checking if there is no network or any other iisue
33+
driver.get(baseurl + '/' + category)
34+
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[1]")))
35+
except TimeoutException as exception:
36+
print("Couldn't fetch problem. Network issue or page slow to render. Try again")
37+
os._exit(-1)
38+
39+
for problem_index in range(1, no_of_problems + 1):
40+
#set problem name
41+
problem_name = driver.find_element_by_xpath("//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[{}]/td[3]".format(problem_index)).text
42+
#set problem url
43+
problem_url = driver.find_element_by_xpath("//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[{}]/td[3]/div/a".format(problem_index)).get_attribute('href')
44+
print(problem_name," ",problem_url)
45+
prblm_info[problem_name] = problem_url
46+
return prblm_info
47+
48+
def get_description(problem_url,problem_name):
49+
try:
50+
#check if the element is founded, and located in the correct format
51+
driver.get(problem_url)
52+
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='app']/div/div[2]/div/div/div[1]/div/div[1]/div[1]/div/div[2]/div/div[2]/div/p[1]")))
53+
problem_title= problem_name
54+
problem_statement = driver.find_element_by_xpath("//*[@id='app']/div/div[2]/div/div/div[1]/div/div[1]/div[1]/div/div[2]/div/div[2]/div/p[1]").text
55+
problem_test_cases = driver.find_element_by_xpath("//*[@id='app']/div/div[2]/div/div/div[1]/div/div[1]/div[1]/div/div[2]/div/div[2]/div/pre[1]").text
56+
57+
58+
if (problem_test_cases.find("Output") == -1):
59+
problem_test_cases = "Input\n" + problem_test_cases
60+
problem_test_cases+="\nOutput\n"
61+
problem_test_cases += driver.find_element_by_xpath("//*[@id='problem-statement']/pre[2]").text
62+
63+
else:
64+
driver.execute_script("window.stop();")
65+
problem={'title':problem_title,'statement':problem_statement,'test_case':problem_test_cases,'url':problem_url}
66+
return problem
67+
68+
except NoSuchElementException as e:
69+
print("Couldn't scrap the element, Unable to locate it")
70+
problem=None
71+
except TimeoutException as exception:
72+
print("Couldn't scrap the element, Unable to locate it")
73+
problem=None
74+
75+
def to_pdf(problem):
76+
pdf = FPDF()
77+
pdf.add_page()
78+
pdf.set_font("Arial", size = 15)
79+
#set title
80+
title=problem["title"].encode('latin-1', 'replace').decode('latin-1')
81+
#set statement
82+
statement=problem["statement"].encode('latin-1', 'replace').decode('latin-1')
83+
#set test cases
84+
test_case=problem["test_case"].encode('latin-1', 'replace').decode('latin-1')
85+
#set url
86+
url=problem["url"]
87+
pdf.cell(200, 10, txt =title, ln = 1, align = 'C')
88+
pdf.multi_cell(200, 10, txt =statement, align = 'L')
89+
pdf.multi_cell(200, 10, txt =test_case, align = 'L')
90+
pdf.write(5, 'Problem_Link: ')
91+
pdf.write(5,url,url)
92+
title = title.rstrip()
93+
pdf.output("./LeetCode-Scrapper/"+title+".pdf")
94+
95+
96+
def main():
97+
category=input("Choose difficulty level from \n Easy \n Medium \n Hard \n\n : ")
98+
no_of_problems=int(input("Enter the number of problems to be scrapped : "))
99+
info = get_problem(problem_difficulty[category], no_of_problems)
100+
for name, url in info.items():
101+
problem=get_description(url,name)
102+
if(problem is not None ):
103+
to_pdf(problem)
104+
else:
105+
pass
106+
107+
if __name__ == '__main__':
108+
main()
109+
110+
#Close the driver path
111+
driver.close()

LeetCode-Scrapper/requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
fpdf==1.7.2
2+
requests==2.24.0
3+
selenium==3.141.0
4+
urllib3==1.25.11

0 commit comments

Comments
 (0)