Skip to content

Commit

Permalink
Add timestamp where able to be extracted
Browse files Browse the repository at this point in the history
  • Loading branch information
njenkins committed Mar 23, 2016
1 parent 249c311 commit 5812db4
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 10 deletions.
6 changes: 2 additions & 4 deletions README.md
Expand Up @@ -2,8 +2,6 @@ This is a scraper that runs on [Morph](https://morph.io). To get started [see th

* PDFs on the web suck

Currently this scraper returns the title, url and year of publication.
Link text is inconsistent, which makes extracting a complete date tricky.
However i'm hoping to add this in the future.

Currently this scraper returns the title, url,year and month/year of publication where it is available.
Link / date format is inconsistent, however where at least a month/year is able to be extracted a timestamp value is also generated
Maybe someone can build something useful with this information.
17 changes: 11 additions & 6 deletions scraper.js
Expand Up @@ -8,7 +8,7 @@ function initDatabase(callback) {
var db = new sqlite3.Database("data.sqlite");
db.serialize(function() {
resetTable(db);
db.run("CREATE TABLE IF NOT EXISTS data (title TEXT, url TEXT, year INTEGER, daymonth STRING)");
db.run("CREATE TABLE IF NOT EXISTS data (title TEXT, url TEXT, year INTEGER, daymonth STRING, timestamp STRING)");
callback(db);
});
}
Expand All @@ -18,15 +18,15 @@ function resetTable(db){
}
function updateRow(db, values) {
// Insert some data.
var statement = db.prepare("INSERT INTO data(title, url, year, daymonth) VALUES (?, ?, ?, ?)");
var statement = db.prepare("INSERT INTO data(title, url, year, daymonth, timestamp) VALUES (?, ?, ?, ?, ?)");
statement.run(values);
statement.finalize();
}

function readRows(db) {
// Read some data.
db.each("SELECT rowid AS id, title, url, year, daymonth FROM data", function(err, row) {
console.log(row.daymonth + ' ' + row.year);
db.each("SELECT rowid AS id, title, url, year, daymonth, timestamp FROM data", function(err, row) {
console.log(row.daymonth + ' ' + row.year + ': '+ row.timestamp);
//console.log(row.id + ": " + row.title + ': ' + row.url + ': ' + row.daymonth +' ' + row.year);
});
}
Expand Down Expand Up @@ -63,8 +63,13 @@ function run(db) {

var url = $link.attr('href');
var year = parseInt($link.closest('.related-box').find('h2').text());
var daymonth = 'test';
var values = [title, url, year, dayMonth];
//If able to parse out at least a month and a year, assign a timestamp
var timestamp;
if(dayMonth){
timestamp = new Date(dayMonth + ' ' + year);
}

var values = [title, url, year, dayMonth, timestamp];
updateRow(db, values);

});
Expand Down

0 comments on commit 5812db4

Please sign in to comment.