diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..66d464d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# Ignore output of scraper +data.sqlite diff --git a/README.md b/README.md new file mode 100644 index 0000000..e541894 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +This is a scraper that runs on [Morph](https://morph.io). To get started [see the documentation](https://morph.io/documentation) \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 0000000..8523d6d --- /dev/null +++ b/package.json @@ -0,0 +1,13 @@ +{ + "version": "0.0.1", + "main": "scraper.js", + "dependencies": { + "cheerio": "latest", + "request": "latest", + "sqlite3": "latest" + }, + "keywords": [ + "scraper", + "morph" + ] +} \ No newline at end of file diff --git a/scraper.js b/scraper.js new file mode 100644 index 0000000..78a22c8 --- /dev/null +++ b/scraper.js @@ -0,0 +1,59 @@ +// This is a template for a Node.js scraper on morph.io (https://morph.io) + +var cheerio = require("cheerio"); +var request = require("request"); +var sqlite3 = require("sqlite3").verbose(); + +function initDatabase(callback) { + // Set up sqlite database. + var db = new sqlite3.Database("data.sqlite"); + db.serialize(function() { + db.run("CREATE TABLE IF NOT EXISTS data (name TEXT)"); + callback(db); + }); +} + +function updateRow(db, value) { + // Insert some data. + var statement = db.prepare("INSERT INTO data VALUES (?)"); + statement.run(value); + statement.finalize(); +} + +function readRows(db) { + // Read some data. + db.each("SELECT rowid AS id, name FROM data", function(err, row) { + console.log(row.id + ": " + row.name); + }); +} + +function fetchPage(url, callback) { + // Use request to read in pages. + request(url, function (error, response, body) { + if (error) { + console.log("Error requesting page: " + error); + return; + } + + callback(body); + }); +} + +function run(db) { + // Use request to read in pages. + fetchPage("https://morph.io", function (body) { + // Use cheerio to find things in the page with css selectors. + var $ = cheerio.load(body); + + var elements = $("div.media-body span.p-name").each(function () { + var value = $(this).text().trim(); + updateRow(db, value); + }); + + readRows(db); + + db.close(); + }); +} + +initDatabase(run);