From fa08317793e9d22435bd0fb5469299a31d73f79c Mon Sep 17 00:00:00 2001 From: khanzanicdecay Date: Wed, 18 Jun 2014 22:17:29 +0100 Subject: [PATCH] Update scraper.php Changed scrape to base64 encode image and grab chipset + real name --- scraper.php | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/scraper.php b/scraper.php index c98957f..900292b 100644 --- a/scraper.php +++ b/scraper.php @@ -64,16 +64,18 @@ function parseModelsPage($brandId,$brandName,$page){ $html_content = scraperwiki::scrape($page); $this->html = str_get_html($html_content); - - foreach ($this->html->find("div.makers a") as $el) { - $img = $el->find('img',0); - $m['name'] = $brandName . ' ' . $el->find('strong',0)->innertext; + + foreach ($this->html->find("#main") as $el) { + $img = $el->find('#specs-cp-pic img',0); + $tmp = $el->find('.brand h1',0)->innertext; + $m['name'] = str_replace(" ", "
", $tmp); $m['img'] = $img->src; - $m['link'] = 'http://www.gsmarena.com/'.$el->href; - $m['desc'] = $img->title; - $temp = explode('-',$el->href); - $m['id'] = (int) substr($temp[1], 0, -4); - $m['brand_id'] = $brandId; + $im = file_get_contents($img->src); + $m['img'] = base64_encode($im); + $tmp = explode(' ', $tmp, 2); + $m['rname'] = $tmp[1]; + $out = explode('Chipset', $el); + $m['desc'] = explode('',$out[1]); scraperwiki::save_sqlite(array("id"=>$m['id']), $m, "cell_model");