/
gotime.py
25 lines (19 loc) · 870 Bytes
/
gotime.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# -*- coding: utf-8 -*-
import scrapy
class PracticalaiSpider(scrapy.Spider):
name = 'gotime'
# allowed_domains = ['http://changelog.com']
start_urls = ['http://changelog.com/gotime/']
def parse(self, response):
for item in response.css('.news_item'):
yield {
'title': item.css('.news_item-header .news_item-title a::text').extract_first(),
'url': item.css('.news_item-header .news_item-title a::attr(href)').extract_first(),
'link': item.css('.news_item-toolbar .news_item-toolbar-play_button::attr(href)').extract_first()
}
next = response.css('.load_more a::attr(href)').extract_first()
print('-------')
print(next)
if next is not None:
yield response.follow(next, self.parse)
# yield scrapy.Request(next)