Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base fork: joshua703/wireink
base: 3126359149
...
head fork: joshua703/wireink
compare: 3fd049497d
  • 2 commits
  • 1 file changed
  • 0 commit comments
  • 1 contributor
Showing with 6 additions and 3 deletions.
  1. +6 −3 indexer/crawler.js
View
9 indexer/crawler.js
@@ -81,6 +81,7 @@ function parseDate(site, $dateEl) {
//A date that looks like "Tue, Mar 25, 2014 -- 9:00 AM" (from KQED as of April 2, 2014)
date = new Date($dateEl.text().replace('--', ''));
} else if (site.pubDate.target === 'dotdash') {
+ //A date that looks like 12.05.2014 should be 12-05-2014
date = new Date($dateEl.text().replace(/\./g, '/').replace('-', ''));
}
if(date !== null) {
@@ -117,8 +118,9 @@ function generateImageFileName(articleImageUrl) {
}
function fetchArticle(articleEl, site) {
+ var articleUrl = site.prependUrl + articleEl.attr('href');
request({
- uri: site.prependUrl + articleEl.attr('href'),
+ uri: articleUrl,
qs: null,
headers: {}
}, function(error, response, articlePage) {
@@ -160,7 +162,7 @@ function fetchArticle(articleEl, site) {
body: body.trim(),
title: title.trim(),
byline: null,
- articleUrl: response.request.href,
+ articleUrl: articleUrl,
articleImage: articleImageUrl ? generateImageFileName(articleImageUrl) : null,
publicationName: site.publicationName,
clicks: 0,
@@ -208,8 +210,9 @@ function loadSites() {
//Be nice to the sites, space out each request by several milliseconds
setTimeout(function() {
Article.count({
- articleUrl: element.attribs.href
+ articleUrl: site.prependUrl + element.attribs.href
}, function(err, count) {
+
//Ensures that article is not present before fetching
if (count > 0) {
console.log("Article is already in DB");

No commit comments for this range

Something went wrong with that request. Please try again.