-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.php
30 lines (28 loc) · 1.44 KB
/
scraper.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
<?php
require 'scraperwiki.php';
######################################
# Basic PHP scraper
######################################
$html = scraperwiki::scrape("http://www.imdb.com/chart/top");
$html = oneline($html);
preg_match_all('|<tr bgcolor="#.*?" valign="top"><td align="right"><font face="Arial, Helvetica, sans-serif" size="-1"><b>(.*?)\.</b></font></td><td align="center"><font face="Arial, Helvetica, sans-serif" size="-1">(.*?)</font></td><td><font face="Arial, Helvetica, sans-serif" size="-1"><a href="(.*?)">(.*?)</a> \((.*?)\)</font></td><td align="right"><font face="Arial, Helvetica, sans-serif" size="-1">.*?</font></td></tr>|',$html,$arr);
foreach ($arr[1] as $key=>$val) {
scraperwiki::save_sqlite(array('rank'), array('rank' => "".clean($arr[1][$key]),'rating' => clean($arr[2][$key]),
'name' => clean($arr[4][$key]),'year' => clean($arr[5][$key]),
'link' => clean('http://www.imdb.com'.$arr[3][$key])));
}
function clean($val) {
$val = str_replace(' ',' ',$val);
$val = str_replace('&','&',$val);
$val = html_entity_decode($val);
$val = strip_tags($val);
$val = trim($val);
$val = utf8_decode($val);
return($val);
}
function oneline($code) {
$code = str_replace("\n",'',$code);
$code = str_replace("\r",'',$code);
return $code;
}
?>