Skip to content

Commit

Permalink
Update scraper.php
Browse files Browse the repository at this point in the history
  • Loading branch information
mycwnet committed Sep 19, 2018
1 parent c52465a commit 2761c12
Showing 1 changed file with 4 additions and 17 deletions.
21 changes: 4 additions & 17 deletions scraper.php
Expand Up @@ -3,11 +3,8 @@
require 'scraperwiki.php';
require 'scraperwiki/simple_html_dom.php';
######################################
# Basic PHP scraper
# Credit to Yukoff this code is forked from their scrapper
# https://github.com/yukoff/imdb-toptv-250
# I couldn't use most of that data an wanted the ID (which that scraper lacked)
# and Rank Only
# Scrapes top 250 movies from imdb
# Outputs rank and id
######################################
$html = scraperwiki::scrape("http://www.imdb.com/chart/top");

Expand All @@ -19,18 +16,8 @@
foreach($dom->find('td.titleColumn') as $movie) {

preg_match('#.*?([1-9][0-9]{0,2})\..*?<a.*?\/title\/(.+?)\/.*#', $movie->innertext, $match);
$ids[$match[1]]=$match[2];
$ids[]=['rank'=>match[1],'imdb_id'=>$match[2]];
}
print_r($ids);
scraperwiki::save_sqlite(['rank'],$ids)

function clean($val)
{
$val = str_replace('&nbsp;', ' ', $val);
$val = str_replace('&amp;', '&', $val);
$val = html_entity_decode($val);
$val = strip_tags($val);
$val = trim($val);
$val = utf8_decode($val);
return $val;
}
?>

0 comments on commit 2761c12

Please sign in to comment.