From 82cd22625a3291c5ad4a25928fe64e6e135cc7da Mon Sep 17 00:00:00 2001 From: Ilya Date: Thu, 2 Feb 2012 04:51:36 +0400 Subject: [PATCH] minor changes in 6 xpath expressions --- mtqinfra/spiders/mtqinfra_spider.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mtqinfra/spiders/mtqinfra_spider.py b/mtqinfra/spiders/mtqinfra_spider.py index d232abd..8c94946 100644 --- a/mtqinfra/spiders/mtqinfra_spider.py +++ b/mtqinfra/spiders/mtqinfra_spider.py @@ -153,14 +153,14 @@ def parse_details(self, response): # Extract structure ID from URL structure_id = response.request.url.split(':')[-1] hxs = HtmlXPathSelector(response) - road_class = "".join(hxs.select('//table[@id="R3886317546232168"]/tr[2]/td/table[2]/tr[7]/td//text()').extract()).strip() - municipality = "".join(hxs.select('//table[@id="R3886317546232168"]/tr[2]/td/table[2]/tr[9]/td//text()').extract()).strip() - rcm = "".join(hxs.select('//table[@id="R3886317546232168"]/tr[2]/td/table[2]/tr[10]/td//text()').extract()).strip() - latitude_text = hxs.select('//table[@id="R3886317546232168"]/tr[2]/td/table[3]/tr[2]/td[1]/text()').extract()[0].strip() + road_class = "".join(hxs.select('//table[@id="R3886317546232168"]/tr[2]/td/table[1]/tr[7]/td//text()').extract()).strip() + municipality = "".join(hxs.select('//table[@id="R3886317546232168"]/tr[2]/td/table[1]/tr[9]/td//text()').extract()).strip() + rcm = "".join(hxs.select('//table[@id="R3886317546232168"]/tr[2]/td/table[1]/tr[10]/td//text()').extract()).strip() + latitude_text = hxs.select('//table[@id="R3886317546232168"]/tr[2]/td/table[2]/tr[2]/td[1]/text()').extract()[0].strip() latitude = float(latitude_text.replace(",", ".")) - longitude_text = hxs.select('//table[@id="R3886317546232168"]/tr[2]/td/table[3]/tr[2]/td[2]/text()').extract()[0].strip() + longitude_text = hxs.select('//table[@id="R3886317546232168"]/tr[2]/td/table[2]/tr[2]/td[2]/text()').extract()[0].strip() longitude = float(longitude_text.replace(",", ".")) - construction_year = hxs.select('//table[@id="R3886317546232168"]/tr[2]/td/table[5]/tr[2]/td/text()').extract()[0].strip() + construction_year = hxs.select('//table[@id="R3886317546232168"]/tr[2]/td/table[4]/tr[2]/td/text()').extract()[0].strip() # Picture is not always available picture_node = hxs.select('//img[@width="300px"][@height="200px"]/@src') if picture_node: