forked from cybits/cybot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fourchan_json.py
123 lines (86 loc) · 3.06 KB
/
fourchan_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import json
import urllib
import random
import sys
from html.parser import HTMLParser
class MLStripper(HTMLParser):
def __init__(self):
super().__init__()
self.reset()
self.fed = []
def handle_data(self, d):
self.fed.append(d)
def get_data(self):
return ''.join(self.fed)
def strip_tags(html):
s = MLStripper()
print("html", html)
s.feed(html)
print("get data", s.get_data())
return s.get_data()
def formattext(text):
text = text.replace("<br>", "\n")
text = text.replace(">", ">")
text = text.replace("'", "'")
text = strip_tags(text)
return text
def write(text):
sys.stdout.write(text)
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
def get_boards_json():
response = urllib.request.urlopen("http://a.4cdn.org/boards.json")
return json.loads(response.read().decode('utf-8'))
def get_page_json(board, pageindex):
return json.loads(((urllib.request.urlopen("http://a.4cdn.org/" + board + "/" + str(pageindex) + ".json")).read().decode('utf-8')))
def get_thread_json(board, threadno):
return json.loads((urllib.request.urlopen("http://a.4cdn.org/" + board + "/thread/" + str(threadno) + ".json")).read().decode('utf-8'))
def get_op_no(pagedata, threadindex):
return pagedata['threads'][threadindex]['posts'][0]['no']
def get_boardargs():
data = get_boards_json()
def get_random_post(args):
for iterations in range(0, 10):
data = get_boards_json()
allboards = data['boards']
found = False
if args['args']:
i = 0
for board in allboards:
i += 1
if args['args'][-1:][0] in board['meta_description'].split()[0].split('"')[-1:]:
found = True
i -= 1
break
if not found:
i = random.randint(0, len(allboards)-1)
board = allboards[i]['board']
numpages = allboards[i]['pages']
i = random.randint(1, numpages)
pagedata = get_page_json(board, i)
threads = pagedata['threads']
numthreads = len(threads)
i = random.randint(0, numthreads-1)
threadno = get_op_no(pagedata, i)
thread = get_thread_json(board, threadno)
j = random.randint(0, len(thread['posts'])-1)
postinfo = json.dumps(thread['posts'][j])
try:
if 'com' in postinfo and 'sticky' not in postinfo:
content = thread['posts'][j]['com']
text = (formattext(content))
if len(text) > 1 and not text[2:].isdigit():
final = text
return final
else:
get_random_post(args)
elif iterations == 10:
return "No shitpost found."
else:
get_random_post(args)
except:
return "No random post for you"