-
Notifications
You must be signed in to change notification settings - Fork 0
/
content_agg.py
94 lines (67 loc) · 2.84 KB
/
content_agg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from abc import ABC, abstractmethod
import praw
import os
from dotenv import load_dotenv
import requests
from bs4 import BeautifulSoup
load_dotenv("/home/kali/python_projects/content_aggrigator/Content-Aggregator-1/setup.env")
CLIENT_ID = os.environ.get('REDDIT_CLIENT_ID')
CLIENT_SECRET = os.environ.get('REDDIT_CLIENT_SECRET')
print(CLIENT_ID, CLIENT_SECRET)
class Source(ABC):
###ABSTRACT CLASS FOR MULTIPLE SITE SOURCES###
def connect(self):
pass
@abstractmethod
def fetch(self):
pass
class RedditSource(Source):
###REDDIT CONNECTION###
def connect(self):
self.reddit_con = praw.Reddit(client_id=CLIENT_ID, client_secret=CLIENT_SECRET, grant_type_access='client_credentials', user_agent='script/1.0') #connection to Reddit API
return self.reddit_con # Pass Out Connection
class RedditHotProgramming(RedditSource):
def __init__(self) -> None:
self.reddit_con = super().connect() # Define reddit_con as super class connect
self.hot_submissions = []
def fetch(self, limit: int):
self.hot_submissions = self.reddit_con.subreddit('programming').hot(limit=limit) # fetch submissions from subreddit with limit to number
def __repr__(self):
urls=[]
for submission in self.hot_submissions: # Append submission to list of urls
urls.append(vars(submission)['url'])
return '\n'.join(urls)
class RedditNewPCMasterRace(RedditSource):
def __init__(self) -> None:
self.reddit_con = super().connect() # Define reddit_con as super class connect
self.new_submissions = []
def fetch(self, limit: int):
self.new_submissions = self.reddit_con.subreddit('pcmasterrace').new(limit=limit) # fetch submissions from subreddit with limit to number
def __repr__(self):
urls = []
for submission in self.new_submissions: # Append submission to list of urls
urls.append(vars(submission)['url'])
return '\n'.join(urls)
class BBCSource(Source):
def connect(self):
self.BBC_page = requests.get("https://www.bbc.co.uk/news")
return self.BBC_page
class BBCHeadlines(BBCSource):
def __init__(self) -> None:
self.BBC_page = super().connect()
self.headlines = []
def fetch(self):
soup = BeautifulSoup(self.BBC_page.text, 'html.parser')
self.headlines = set([element.text for element in soup.find_all("h3")])
def __repr__(self):
return "\n".join(self.headlines)
if __name__ == '__main__':
# reddit_top_programming = RedditHotProgramming()
# reddit_top_programming.fetch(limit=10)
# print(reddit_top_programming)
# reddit_new_pcmasterrace = RedditNewPCMasterRace()
# reddit_new_pcmasterrace.fetch(limit=15)
# print(reddit_new_pcmasterrace)
BBC_news = BBCHeadlines()
BBC_news.fetch()
print(BBC_news)