-
Notifications
You must be signed in to change notification settings - Fork 0
/
Parse.py
21 lines (21 loc) · 1.29 KB
/
Parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import bs4
def parse_html(value):
info = {}
htmlString = value['comment']
page = bs4.BeautifulSoup(htmlString, 'lxml')
info['username'] = page.select(".Username")[0].getText()
info['totalPosts'] = page.find(lambda tag: tag.name == 'span' and tag['class'] == ['MItem', 'CountDiscussions']).getText().partition(" ")[0]
info['profilePic'] = str(page.select(".ProfilePhotoMedium")[0]).partition('src="')[2].partition('"')[0]
info['dtString'] = str(page.select("time")[0]).partition('datetime="')[2].partition('"')[0].partition('T')[0]
info['content'] = page.select(".userContent")[0]
justification = ""
if value['type'] == "direct":
justification += """{} posted their stats <a href="{}">here</a>""".format(info['username'], value['urls'][0])
else:
justification = """{} posted their admission decision <a href="{}">here</a><br>""".format(info['username'],value['urls'][0])
justification += """{} posted their stats <a href="{}">here</a>""".format(info['username'], value['urls'][1])
info['justification'] = justification
info['foundVia'] = value['type']
info['url'] = value['urls'][0]
info['title'] = ' '.join([x.title() for x in info['url'][::-1].partition('/')[0][::-1].partition('-')[2].replace(".html", "").split("-")])
return info