Skip to content

Commit

Permalink
Fork of code from ScraperWiki at https://classic.scraperwiki.com/scra…
Browse files Browse the repository at this point in the history
  • Loading branch information
kreynen committed Dec 29, 2014
0 parents commit 936f0be
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Ignore output of scraper
data.sqlite
1 change: 1 addition & 0 deletions README.textile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Still under development
42 changes: 42 additions & 0 deletions scraper.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<?php
require 'scraperwiki/simple_html_dom.php';

// Get page filtered by 7.x compatibility.
$uris = array(
'modules' => "http://drupal.org/project/modules/index?project-status=0&drupal_core=103",
'themes' => "http://drupal.org/project/themes/index?project-status=0&drupal_core=103",
'profiles' => "http://drupal.org/project/installation%2Bprofiles/index?project-status=0&drupal_core=103",
);

$dom = new simple_html_dom();

foreach ($uris as $uri) {
print $uri;
$content = scraperwiki::scrape($uri);
$dom->load($content);
saveProjects($dom);
}

function saveProjects($dom) {
// Each project is listed in its own row. Iterate through the rows in the View to pull out the data.
foreach($dom->find('.view-project-index .views-field-title a') as $data){

// Save this project's information to the database.
$record = extractInfo($data);
scraperwiki::save(array('project_name'), $record);
}
}

function extractInfo($data) {
// Pull out the project's machine name from the link's href.
$project_name = str_replace('/project/', '', $data->href);
$drupal_code_url = "http://drupalcode.org" . $data->href . ".git";
$git_url = "http://git.drupal.org" . $data->href . ".git";

$record = array(
'project_name' => $project_name,
'git_url' => $git_url,
);

return $record;
}

0 comments on commit 936f0be

Please sign in to comment.