# 模拟Ajax爬取微博内容

In [16]:
import requests
from urllib.parse import urlencode
from pyquery import PyQuery as pq
from pymongo import MongoClient

base_url = 'https://m.weibo.cn/api/container/getIndex?'
headers = {
    'Host': 'm.weibo.cn',
    'Referer': 'https://m.weibo.cn/u/5318584155',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest',
}
client = MongoClient()
db = client['weibo']
collection = db['weibo']
max_page = 10


def get_page(page):
    params = {
        'type': 'uid',
        'value': '5318584155',
        'containerid': '1076035318584155',
        'page': page
    }
    url = base_url + urlencode(params)
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.json(), page
    except requests.ConnectionError as e:
        print('Error', e.args)


def parse_page(json, page: int):
    if json:
        items = json.get('data').get('cards')
        for index, item in enumerate(items):
            if page == 1 and index == 1:
                continue
            else:
                item = item.get('mblog')
                weibo = {}
                weibo['id'] = item.get('id')
                weibo['text'] = pq(item.get('text')).text()
                weibo['attitudes'] = item.get('attitudes_count')
                weibo['comments'] = item.get('comments_count')
                weibo['reposts'] = item.get('reposts_count')
                yield weibo


def save_to_mongo(result):
    if collection.insert(result):
        print('Saved to Mongo')


if __name__ == '__main__':
    for page in range(1, max_page + 1):
        json = get_page(page)
        results = parse_page(*json)
        for result in results:
            print(result)
            save_to_mongo(result)


{'id': '4274822886004699', 'text': '杨哥好样的！@孙杨\n🇨🇳🇨🇳🇨🇳🇨🇳🇨🇳🇨🇳🇨🇳\nOne more！', 'attitudes': 2755, 'comments': 186, 'reposts': 22}
Saved to Mongo
{'id': '4274199671348477', 'text': '好运！[加油][加油][加油] 印度尼西亚·雅加达', 'attitudes': 7580, 'comments': 817, 'reposts': 363}
Saved to Mongo
{'id': '4273751959175493', 'text': '我们尽力而为！哈哈', 'attitudes': 1173, 'comments': 107, 'reposts': 32}
Saved to Mongo
{'id': '4273747814820730', 'text': '满分一百，给你俩98！剩下你俩一人扣一分，怕你俩骄傲！哈哈哈哈哈哈哈。', 'attitudes': 665, 'comments': 50, 'reposts': 9}
Saved to Mongo
{'id': '4273721906120907', 'text': '哈哈哈好啊，到时候就给你来个帅气暴扣💪💪💪', 'attitudes': 865, 'comments': 85, 'reposts': 19}
Saved to Mongo
{'id': '4273286085581475', 'text': '谁干的来出来！哈哈哈 ！不过我将以此为目标！继续努力！继续加油！中国队！🇨🇳', 'attitudes': 9198, 'comments': 807, 'reposts': 221}
Saved to Mongo
{'id': '4270842244363593', 'text': '“每个人心目中都有个珠穆朗玛峰，定了多少高度的目标，就要为目标而努力。生活不仅是篮球，现在只要努力，结果已经不重要了。”—谢谢@王石 大师的智慧，我抄下来也愿与所有人共勉之。', 'attitudes': 2803, 'comments': 242, 'reposts': 72}
Saved to Mongo
{'id': '427080987



{'id': '4268280581100141', 'text': '一直以来都有军人梦，每次看军人题材的电视剧都会热血沸腾热泪盈眶。致敬！！！', 'attitudes': 1695, 'comments': 129, 'reposts': 42}
Saved to Mongo
{'id': '4265589755199268', 'text': '🇨🇳[加油][加油]', 'attitudes': 7214, 'comments': 749, 'reposts': 182}
Saved to Mongo
{'id': '4265257083465427', 'text': '剧透一小波！', 'attitudes': 4474, 'comments': 467, 'reposts': 49}
Saved to Mongo
{'id': '4264090354317263', 'text': '我来了！ 北京·北京首都国际机场(商务中心)', 'attitudes': 6340, 'comments': 720, 'reposts': 116}
Saved to Mongo
{'id': '4262191005187600', 'text': '哦哟！太厉害了！！精彩精彩！！', 'attitudes': 2236, 'comments': 169, 'reposts': 10}
Saved to Mongo
{'id': '4259852026765832', 'text': '@阿不都沙拉木14 #童年照相馆#', 'attitudes': 6732, 'comments': 648, 'reposts': 147}
Saved to Mongo
{'id': '4258759636979095', 'text': '给自己加个油！👏👏👏', 'attitudes': 5216, 'comments': 669, 'reposts': 74}
Saved to Mongo
{'id': '4257598767318668', 'text': '这玩意咬人不，想给它送回湖边！', 'attitudes': 2965, 'comments': 304, 'reposts': 30}
Saved to Mongo
{'id': '4255793492850699'

{'id': '4104207756135632', 'text': '', 'attitudes': 1232, 'comments': 77, 'reposts': 128}
Saved to Mongo
{'id': '4101133846280874', 'text': '据知情人士透露。我要走了。', 'attitudes': 4413, 'comments': 1322, 'reposts': 158}
Saved to Mongo
{'id': '4100526327088808', 'text': '又当了次少先队员。感觉真好。', 'attitudes': 3413, 'comments': 224, 'reposts': 22}
Saved to Mongo
{'id': '4095801350726774', 'text': '来来来，感受一下飞人的小白鞋🏀🏀🏀满满的活力！\n#纯白当道##PureMoney#', 'attitudes': 2321, 'comments': 219, 'reposts': 13}
Saved to Mongo
{'id': '4094944429642750', 'text': '除了CBA冠军，又收到了 @StephenCurry 送给我的这台#vivo 库里定制版Xplay6# 蓝黄经典，感受到了传奇30赛场上的爆发力，谢谢。', 'attitudes': 4922, 'comments': 596, 'reposts': 76}
Saved to Mongo
{'id': '4094272292847662', 'text': '分享视频 秒拍视频 .', 'attitudes': 2612, 'comments': 259, 'reposts': 142}
Saved to Mongo
{'id': '4094264462282583', 'text': '秒拍视频 .', 'attitudes': 4042, 'comments': 353, 'reposts': 147}
Saved to Mongo
{'id': '4094039291259663', 'text': '', 'attitudes': 2725, 'comments': 366, 'reposts': 29}
Saved to 