-
Notifications
You must be signed in to change notification settings - Fork 27
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
我读取json文件,显示乱码。 #6
Labels
Comments
import hashlib
import json
import time
import pymysql
import requests
from fake_useragent import UserAgent
from requests.exceptions import RequestException
import trip
ua = UserAgent()
s = requests.session()
def getASCP():
t = round(time.time())
e = hex(t).upper()[2:]
m = hashlib.md5()
m.update(str(t).encode(encoding='utf-8'))
i = m.hexdigest().upper()
if len(e) != 8:
AS = '479BB4B7254C150'
CP = '7E0AC8874BB0985'
return AS, CP
n = i[0:5]
a = i[-5:]
s = ''
r = ''
for o in range(5):
s += n[o] + e[o]
r += e[o + 3] + a[o]
AS = 'A1' + s + e[-3:]
CP = e[0:3] + r + 'E1'
return AS, CP
@trip.coroutine
def start_requests(maxtime=0):
AS, CP = getASCP()
headers = {'User-Agent': ua.random}
feed_url = 'https://www.toutiao.com/api/pc/feed/'
payloads = {'max_behot_time': maxtime, 'category': '__all__', 'utm_source': 'toutiao', 'widen': 1,
'tadrequire': 'false', 'as': AS, 'cp': CP}
global r
try:
r =yield trip.get(feed_url, params=payloads, headers=headers)
r_co=r.content
r.encoding="utf-8"
print(r.encoding)
r_js=r.json()
if 'data' in r.keys():
return r_js
except RequestException as e:
print('请求不成功', e)
return None
async def parse_detail(response):
for i in response.get('data', None):
if i.get('is_feed_ad') == False:
title = i.get('title')
tags = i.get('chinese_tag')
comments = i.get('comments_count')
result = {'title': title, 'tags': tags, 'comments': comments}
print(result)
insert_mysql(result)
if response.get('next'):
maxtime = response.get('next').get('max_behot_time')
return parse_detail(trip.run(start_requests(maxtime=maxtime)))
def write_json(result):
with open('tt.txt', 'a', encoding='utf-8') as f:
f.write(json.dumps(result, ensure_ascii=False) + '\n')
def insert_mysql(result):
try:
conn = pymysql.Connect(host="127.0.0.1", port=3306, user='root', passwd='root', db='spider', charset='utf8')
cursor = conn.cursor()
sql_in = "replace into lala (title,tags,comments) values(%s,%s,%s)"
cursor.execute(sql_in, (result['title'], result['tags'], result['comments']))
conn.commit()
except Exception as e:
print(e)
conn.rollback()
def main():
response = trip.run(start_requests)
trip.run(parse_detail(response))
if __name__ == '__main__':
main() |
@kof0012 是我gzip处理的时候的问题,你更新一下版本(0.0.3)即可。 python -m pip install trip -U |
@littlecodersh 感谢回复,另外请问怎么在trip.run(fun)里写参数。实现trip.run(fun(args)),还是抓今日头条的json文件,想要递归回调(带参数),想了半天想不出来办法。。求教。
|
from functools import partial |
@littlecodersh 多谢提醒,已经解决。 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
我抓今日头条的json,出现raise JSONDecodeError("Expecting value", s, err.value) from None
���������
出现类似这样的乱码。
The text was updated successfully, but these errors were encountered: