Skip to content

Commit

Permalink
Fix
Browse files Browse the repository at this point in the history
  • Loading branch information
kmiyashiro committed Apr 6, 2020
1 parent eaaefa3 commit 0c08a07
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 68 deletions.
4 changes: 4 additions & 0 deletions .prettierrc
@@ -0,0 +1,4 @@
trailingComma: "es5"
tabWidth: 2
semi: false
singleQuote: true
7 changes: 5 additions & 2 deletions package.json
Expand Up @@ -2,9 +2,12 @@
"version": "0.0.1",
"main": "scraper.js",
"dependencies": {
"axios": "^0.19.2",
"axios-cookiejar-support": "^0.5.1",
"cheerio": "latest",
"request": "latest",
"sqlite3": "latest"
"prettier": "^2.0.4",
"sqlite3": "latest",
"tough-cookie": "^4.0.0"
},
"scripts": {
"start": "node scraper.js"
Expand Down
160 changes: 96 additions & 64 deletions scraper.js
@@ -1,92 +1,124 @@
// This is a template for a Node.js scraper on morph.io (https://morph.io)

var cheerio = require("cheerio");
var request = require("request");
var sqlite3 = require("sqlite3").verbose();
const cheerio = require('cheerio')
const querystring = require('querystring')
const axios = require('axios').default
const tough = require('tough-cookie')
const axiosCookieJarSupport = require('axios-cookiejar-support').default
const sqlite3 = require('sqlite3').verbose()

const { Cookie } = tough

axiosCookieJarSupport(axios)

const cookieJar = new tough.CookieJar()
axios.defaults.jar = cookieJar
axios.defaults.withCredentials = true

const LOGIN_URL = 'https://www.klwines.com/account/login'
const USERNAME = process.env.MORPH_KL_USER
const PASSWORD = process.env.MORPH_KL_PASSWORD

async function getLoginRequestToken() {
const response = await axios(LOGIN_URL)
const $ = cheerio.load(response.data)
return $('[name="__RequestVerificationToken"]').val()
}

async function getCookie() {
const loginRequestToken = await getLoginRequestToken()
const formData = {
__RequestVerificationToken: loginRequestToken,
Email: USERNAME,
Password: PASSWORD,
ReturnUrl: '',
'Login.x': '15',
'Login.y': '5',
}

return axios(LOGIN_URL, {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'User-Agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9',
},
data: querystring.stringify(formData),
})
}

function initDatabase(callback) {
// Set up sqlite database.
var db = new sqlite3.Database("data.sqlite");
db.serialize(function() {
const db = new sqlite3.Database('data.sqlite')
db.serialize(function () {
db.run(
"CREATE TABLE IF NOT EXISTS data (key PRIMARY KEY, id TEXT, date TEXT, name TEXT, price INT)"
);
callback(db);
});
'CREATE TABLE IF NOT EXISTS data (key PRIMARY KEY, id TEXT, date TEXT, name TEXT, price INT)'
)
callback(db)
})
}

function updateRow(db, id, name, price) {
let date = new Date();
console.log('UPDATE', id, name, price)
let date = new Date()
let dateString =
date.getFullYear() + "-" + (date.getMonth() + 1) + "-" + date.getDate();
date.getFullYear() + '-' + (date.getMonth() + 1) + '-' + date.getDate()

// Insert some data.
var statement = db.prepare(`INSERT INTO data(key, id, date, name, price)
const statement = db.prepare(`INSERT INTO data(key, id, date, name, price)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT(key) DO UPDATE SET
price=excluded.price
`);
statement.run(`${dateString}-${id}`, id, dateString, name, price);
statement.finalize();
`)
statement.run(`${dateString}-${id}`, id, dateString, name, price)
statement.finalize()
console.log('FINALIZE', id, name, price)
}

function readRows(db) {
// Read some data.
db.each("SELECT name, date, price FROM data", function(err, row) {
console.log(`${row.date} ${row.name}: ${row.price}`);
});
db.each('SELECT name, date, price FROM data', function (err, row) {
console.log(`${row.date} ${row.name}: ${row.price}`)
})
}

function getSourceId(url) {
return url.match(/i=([\d]+?)\&/)[1];
return url.match(/i=([\d]+?)\&/)[1]
}

function fetchPage(url, callback) {
async function run(db) {
console.log('running')
console.log('getting cookie')
await getCookie()
// Use request to read in pages.
request(url, function(error, response, body) {
if (error) {
console.log("Error requesting page: " + error);
return;
}

callback(body);
});
}
console.log('fetching page')
const response = await axios(
'https://www.klwines.com/Products?&filters=sv2_206!20&limit=500&offset=0'
)
// Use cheerio to find things in the page with css selectors.
const $ = cheerio.load(response.data)

function run(db) {
// Use request to read in pages.
fetchPage(
"https://www.klwines.com/Products?&filters=sv2_206!20&limit=500&offset=0",
function(body) {
// Use cheerio to find things in the page with css selectors.
var $ = cheerio.load(body);

var elements = $(".result");
// console.log("Elements", elements);
elements.each(function() {
var link = $(this)
.find(".result-desc > a[href^='/p/i']")
.first();

var id = getSourceId(link.attr("href"));
var name = link.text().trim();
var price = parseInt(
$(this)
.find(".price strong")
.text()
.trim()
.replace(/[\$\.]/g, "")
);
console.log("id", id);
console.log("name", name);
console.log("price", price);

updateRow(db, id, name, price);
});

db.close();
}
);
const elements = $('.tf-product')
console.log(`Found ${elements.length} elements`)
// console.log("Elements", elements);
elements.each(function () {
const link = $(this).find(".tf-product-header > a[href^='/p/i']").first()

const id = getSourceId(link.attr('href'))
const name = link.text().trim()
const priceNode = $(this).find('.tf-price span:nth-of-type(2)')
const price = parseInt(
priceNode
.text()
.trim()
.replace(/[\$\.\*]/g, '')
)
console.log('id:', id, 'name:', name, 'price:', price)

updateRow(db, id, name, price)
})

db.close()
}

initDatabase(run);
initDatabase(run)
93 changes: 91 additions & 2 deletions yarn.lock
Expand Up @@ -7,6 +7,11 @@
resolved "https://registry.yarnpkg.com/@types/node/-/node-12.12.11.tgz#bec2961975888d964196bf0016a2f984d793d3ce"
integrity sha512-O+x6uIpa6oMNTkPuHDa9MhMMehlxLAd5QcOvKRjAFsBVpeFWTOPnXbDvILvFgFFZfQ1xh1EZi1FbXxUix+zpsQ==

"@types/tough-cookie@^2.3.3":
version "2.3.7"
resolved "https://registry.yarnpkg.com/@types/tough-cookie/-/tough-cookie-2.3.7.tgz#979434b5900f9d710f5d4e15c466cadb8e9fdc47"
integrity sha512-rMQbgMGxnLsdn8e9aPVyuN+zMQLrZ2QW8xlv7eWS1mydfGXN+tsTKffcIzd8rGCcLdmi3xvQw2MDaZI1bBNTaw==

abbrev@1:
version "1.1.1"
resolved "https://registry.yarnpkg.com/abbrev/-/abbrev-1.1.1.tgz#f8f2c887ad10bf67f634f005b6987fed3179aac8"
Expand Down Expand Up @@ -72,6 +77,23 @@ aws4@^1.8.0:
resolved "https://registry.yarnpkg.com/aws4/-/aws4-1.8.0.tgz#f0e003d9ca9e7f59c7a508945d7b2ef9a04a542f"
integrity sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ==

axios-cookiejar-support@^0.5.1:
version "0.5.1"
resolved "https://registry.yarnpkg.com/axios-cookiejar-support/-/axios-cookiejar-support-0.5.1.tgz#0622c2849cefbaf8424a50b630283231b62fc277"
integrity sha512-mmMbNDjpkAKlyxVOYjkpvV6rDRoSjBXwHbfkWvnsplRTGYCergbHvZInRB1G3lqumllUQwo0A4uPoqEsYfzq3A==
dependencies:
"@types/tough-cookie" "^2.3.3"
is-redirect "^1.0.0"
pify "^4.0.0"
tough-cookie "^3.0.1"

axios@^0.19.2:
version "0.19.2"
resolved "https://registry.yarnpkg.com/axios/-/axios-0.19.2.tgz#3ea36c5d8818d0d5f8a8a97a6d36b86cdc00cb27"
integrity sha512-fjgm5MvRHLhx+osE2xoekY70AhARk3a6hkN+3Io1jc00jtquGvxYlKlsFUhmUET0V5te6CcZI7lcv2Ym61mjHA==
dependencies:
follow-redirects "1.5.10"

balanced-match@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.0.tgz#89b4d199ab2bee49de164ea02b89ce462d71b767"
Expand Down Expand Up @@ -168,6 +190,13 @@ dashdash@^1.12.0:
dependencies:
assert-plus "^1.0.0"

debug@=3.1.0:
version "3.1.0"
resolved "https://registry.yarnpkg.com/debug/-/debug-3.1.0.tgz#5bb5a0672628b64149566ba16819e61518c67261"
integrity sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==
dependencies:
ms "2.0.0"

debug@^3.2.6:
version "3.2.6"
resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.6.tgz#e83d17de16d8a7efb7717edbe5fb10135eee629b"
Expand Down Expand Up @@ -287,6 +316,13 @@ fast-json-stable-stringify@^2.0.0:
resolved "https://registry.yarnpkg.com/fast-json-stable-stringify/-/fast-json-stable-stringify-2.0.0.tgz#d5142c0caee6b1189f87d3a76111064f86c8bbf2"
integrity sha1-1RQsDK7msRifh9OnYREGT4bIu/I=

follow-redirects@1.5.10:
version "1.5.10"
resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.5.10.tgz#7b7a9f9aea2fdff36786a94ff643ed07f4ff5e2a"
integrity sha512-0V5l4Cizzvqt5D44aTXbFZz+FtyXV1vrDN6qrelxtfYQKW0KO0W2T/hkE8xvGa/540LkZlkaUjO4ailYTFtHVQ==
dependencies:
debug "=3.1.0"

forever-agent@~0.6.1:
version "0.6.1"
resolved "https://registry.yarnpkg.com/forever-agent/-/forever-agent-0.6.1.tgz#fbc71f0c41adeb37f96c577ad1ed42d8fdacca91"
Expand Down Expand Up @@ -417,6 +453,11 @@ ini@~1.3.0:
resolved "https://registry.yarnpkg.com/ini/-/ini-1.3.5.tgz#eee25f56db1c9ec6085e0c22778083f596abf927"
integrity sha512-RZY5huIKCMRWDUqZlEi72f/lmXKMvuszcMBduliQ3nnWbx9X/ZBQO7DijMEYS9EhHBb2qacRUMtC7svLwe0lcw==

ip-regex@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/ip-regex/-/ip-regex-2.1.0.tgz#fa78bf5d2e6913c911ce9f819ee5146bb6d844e9"
integrity sha1-+ni/XS5pE8kRzp+BnuUUa7bYROk=

is-fullwidth-code-point@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-1.0.0.tgz#ef9e31386f031a7f0d643af82fde50c457ef00cb"
Expand All @@ -429,6 +470,11 @@ is-fullwidth-code-point@^2.0.0:
resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz#a3b30a5c4f199183167aaab93beefae3ddfb654f"
integrity sha1-o7MKXE8ZkYMWeqq5O+764937ZU8=

is-redirect@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/is-redirect/-/is-redirect-1.0.0.tgz#1d03dded53bd8db0f30c26e4f95d36fc7c87dc24"
integrity sha1-HQPd7VO9jbDzDCbk+V02/HyH3CQ=

is-typedarray@~1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/is-typedarray/-/is-typedarray-1.0.0.tgz#e479c80858df0c1b11ddda6940f96011fcda4a9a"
Expand Down Expand Up @@ -530,6 +576,11 @@ mkdirp@^0.5.0, mkdirp@^0.5.1:
dependencies:
minimist "0.0.8"

ms@2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/ms/-/ms-2.0.0.tgz#5608aeadfc00be6c2901df5f9861788de0d597c8"
integrity sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=

ms@^2.1.1:
version "2.1.2"
resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009"
Expand Down Expand Up @@ -660,6 +711,16 @@ performance-now@^2.1.0:
resolved "https://registry.yarnpkg.com/performance-now/-/performance-now-2.1.0.tgz#6309f4e0e5fa913ec1c69307ae364b4b377c9e7b"
integrity sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=

pify@^4.0.0:
version "4.0.1"
resolved "https://registry.yarnpkg.com/pify/-/pify-4.0.1.tgz#4b2cd25c50d598735c50292224fd8c6df41e3231"
integrity sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g==

prettier@^2.0.4:
version "2.0.4"
resolved "https://registry.yarnpkg.com/prettier/-/prettier-2.0.4.tgz#2d1bae173e355996ee355ec9830a7a1ee05457ef"
integrity sha512-SVJIQ51spzFDvh4fIbCLvciiDMCrRhlN3mbZvv/+ycjvmF5E73bKdGfU8QDLNmjYJf+lsGnDBC4UUnvTe5OO0w==

process-nextick-args@~2.0.0:
version "2.0.1"
resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2"
Expand All @@ -670,12 +731,17 @@ psl@^1.1.24:
resolved "https://registry.yarnpkg.com/psl/-/psl-1.4.0.tgz#5dd26156cdb69fa1fdb8ab1991667d3f80ced7c2"
integrity sha512-HZzqCGPecFLyoRj5HLfuDSKYTJkAfB5thKBIkRHtGjWwY7p1dAyveIbXIq4tO0KYfDF2tHqPUgY9SDnGm00uFw==

psl@^1.1.28, psl@^1.1.33:
version "1.8.0"
resolved "https://registry.yarnpkg.com/psl/-/psl-1.8.0.tgz#9326f8bcfb013adcc005fdff056acce020e51c24"
integrity sha512-RIdOzyoavK+hA18OGGWDqUTsCLhtA7IcZ/6NCs4fFJaHBDab+pDDmDIByWFRQJq2Cd7r1OoQxBGKOaztq+hjIQ==

punycode@^1.4.1:
version "1.4.1"
resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.4.1.tgz#c0d5a63b2718800ad8e1eb0fa5269c84dd41845e"
integrity sha1-wNWmOycYgArY4esPpSachN1BhF4=

punycode@^2.1.0:
punycode@^2.1.0, punycode@^2.1.1:
version "2.1.1"
resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec"
integrity sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==
Expand Down Expand Up @@ -717,7 +783,7 @@ readable-stream@^3.1.1:
string_decoder "^1.1.1"
util-deprecate "^1.0.1"

request@^2.87.0, request@latest:
request@^2.87.0:
version "2.88.0"
resolved "https://registry.yarnpkg.com/request/-/request-2.88.0.tgz#9c2fca4f7d35b592efe57c7f0a55e81052124fef"
integrity sha512-NAqBSrijGLZdM0WZNsInLJpkJokL72XYjUpnB0iwsRgxh7dB6COrHnTBNwN0E+lHDAJzu7kLAkDeY08z2/A0hg==
Expand Down Expand Up @@ -872,6 +938,24 @@ tar@^4:
safe-buffer "^5.1.2"
yallist "^3.0.3"

tough-cookie@^3.0.1:
version "3.0.1"
resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-3.0.1.tgz#9df4f57e739c26930a018184887f4adb7dca73b2"
integrity sha512-yQyJ0u4pZsv9D4clxO69OEjLWYw+jbgspjTue4lTQZLfV0c5l1VmK2y1JK8E9ahdpltPOaAThPcp5nKPUgSnsg==
dependencies:
ip-regex "^2.1.0"
psl "^1.1.28"
punycode "^2.1.1"

tough-cookie@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-4.0.0.tgz#d822234eeca882f991f0f908824ad2622ddbece4"
integrity sha512-tHdtEpQCMrc1YLrMaqXXcj6AxhYi/xgit6mZu1+EDWUn+qhUf8wMQoFIy9NXuq23zAwtcB0t/MjACGR18pcRbg==
dependencies:
psl "^1.1.33"
punycode "^2.1.1"
universalify "^0.1.2"

tough-cookie@~2.4.3:
version "2.4.3"
resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-2.4.3.tgz#53f36da3f47783b0925afa06ff9f3b165280f781"
Expand All @@ -892,6 +976,11 @@ tweetnacl@^0.14.3, tweetnacl@~0.14.0:
resolved "https://registry.yarnpkg.com/tweetnacl/-/tweetnacl-0.14.5.tgz#5ae68177f192d4456269d108afa93ff8743f4f64"
integrity sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=

universalify@^0.1.2:
version "0.1.2"
resolved "https://registry.yarnpkg.com/universalify/-/universalify-0.1.2.tgz#b646f69be3942dabcecc9d6639c80dc105efaa66"
integrity sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==

uri-js@^4.2.2:
version "4.2.2"
resolved "https://registry.yarnpkg.com/uri-js/-/uri-js-4.2.2.tgz#94c540e1ff772956e2299507c010aea6c8838eb0"
Expand Down

0 comments on commit 0c08a07

Please sign in to comment.