Skip to content

Commit

Permalink
fixed spiders to extract the title field
Browse files Browse the repository at this point in the history
  • Loading branch information
ansonyao committed Apr 20, 2016
1 parent 97b8a4a commit a9b544e
Show file tree
Hide file tree
Showing 3 changed files with 1,895 additions and 6 deletions.
6 changes: 3 additions & 3 deletions basicspider/craigslist_sample/spiders/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ def parse(self, response):
hxs = HtmlXPathSelector(response)
titles = hxs.xpath("//span[@class='pl']")
items = []
for titles in titles:
for title in titles:
item = CraigslistSampleItem()
item["title"] = titles.select("a/text()").extract()
item["link"] = titles.select("a/@href").extract()
item["title"] = title.select('a/span[@id="titletextonly"]/text()').extract()
item["link"] = title.select("a/@href").extract()
items.append(item)
return items
6 changes: 3 additions & 3 deletions crawlspider/craigslist_sample/spiders/test2.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ def parse_items(self, response):
hxs = HtmlXPathSelector(response)
titles = hxs.xpath('//span[@class="pl"]')
items = []
for titles in titles:
for title in titles:
item = CraigslistSampleItem()
item["title"] = titles.xpath("a/text()").extract()
item["link"] = titles.xpath("a/@href").extract()
item["title"] = title.select('a/span[@id="titletextonly"]/text()').extract()
item["link"] = title.xpath("a/@href").extract()
items.append(item)
return(items)
Loading

0 comments on commit a9b544e

Please sign in to comment.