Skip to content

Commit

Permalink
Update scraper.php
Browse files Browse the repository at this point in the history
Changed scrape to base64 encode image and grab chipset + real name
  • Loading branch information
khanzanicdecay committed Jun 18, 2014
1 parent 01aec8d commit fa08317
Showing 1 changed file with 11 additions and 9 deletions.
20 changes: 11 additions & 9 deletions scraper.php
Expand Up @@ -64,16 +64,18 @@ function parseModelsPage($brandId,$brandName,$page){

$html_content = scraperwiki::scrape($page);
$this->html = str_get_html($html_content);

foreach ($this->html->find("div.makers a") as $el) {
$img = $el->find('img',0);
$m['name'] = $brandName . ' ' . $el->find('strong',0)->innertext;

foreach ($this->html->find("#main") as $el) {
$img = $el->find('#specs-cp-pic img',0);
$tmp = $el->find('.brand h1',0)->innertext;
$m['name'] = str_replace(" ", "<br>", $tmp);
$m['img'] = $img->src;
$m['link'] = 'http://www.gsmarena.com/'.$el->href;
$m['desc'] = $img->title;
$temp = explode('-',$el->href);
$m['id'] = (int) substr($temp[1], 0, -4);
$m['brand_id'] = $brandId;
$im = file_get_contents($img->src);
$m['img'] = base64_encode($im);
$tmp = explode(' ', $tmp, 2);
$m['rname'] = $tmp[1];
$out = explode('<td class="ttl"><a href="glossary.php3?term=chipset">Chipset</a></td>', $el);
$m['desc'] = explode('</td>',$out[1]);

scraperwiki::save_sqlite(array("id"=>$m['id']), $m, "cell_model");

Expand Down

0 comments on commit fa08317

Please sign in to comment.