Skip to content

Commit 78ecb22

Browse files
committed
creation and additon of pdf content functionality added
1 parent 79306d6 commit 78ecb22

File tree

1 file changed

+22
-6
lines changed

1 file changed

+22
-6
lines changed

Dev.to Scraper/scraper.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from selenium import webdriver
44
from selenium.webdriver.common.keys import Keys
55
import time
6+
from fpdf import FPDF
67

78
# Get input for category and number of articles
89
category = input("Enter category: ")
@@ -50,14 +51,29 @@
5051
article_content_div = article_content.find('div',class_='crayons-article__main')
5152
article_content_body = article_content_div.find('div',class_='crayons-article__body')
5253
p_tags = article_content_body.find_all('p')
53-
article_content=""
54-
for p_tag in p_tags:
55-
article_content += (p_tag.text.strip()+'\n')
5654

55+
title_string = (title_content.text.strip()).encode('latin-1', 'replace').decode('latin-1')
56+
author_string = ("By - {}".format(author_name.text.strip())).encode('latin-1', 'replace').decode('latin-1')
57+
58+
# Add a page
59+
pdf = FPDF()
60+
pdf.add_page()
61+
# set style and size of font
62+
pdf.set_font("Arial", size = 12)
63+
64+
# Title cell
65+
pdf.cell(200, 5, txt = title_string,ln = 1, align = 'C')
66+
# Author cell
67+
pdf.cell(200, 10, txt = author_string,ln = 2, align = 'C')
5768

58-
print("Title : " + title_content.text.strip())
59-
print("Author : "+ author_name.text.strip())
60-
print("Body : "+ article_content)
69+
for p_tag in p_tags:
70+
article_part = (p_tag.text.strip()).encode('latin-1', 'replace').decode('latin-1')
71+
# Add part of article to pdf
72+
pdf.multi_cell(0, 5, txt = article_part, align = 'L')
73+
74+
# save the pdf with name .pdf
75+
pdf_title = ''.join(e for e in title_string if e.isalnum())
76+
pdf.output("{}.pdf".format(pdf_title))
6177

6278
count = count + 1
6379
if(count == number_articles) :

0 commit comments

Comments
 (0)