Skip to content

Commit

Permalink
[fix] scrape more lines, keep new lines
Browse files Browse the repository at this point in the history
  • Loading branch information
Swaagie committed Mar 14, 2013
1 parent 981176f commit 52e689f
Showing 1 changed file with 3 additions and 4 deletions.
7 changes: 3 additions & 4 deletions index.js
Expand Up @@ -6,11 +6,10 @@ var path = require('path'),
lunr = require('lunr'), lunr = require('lunr'),
tokenizer = new natural.WordTokenizer(), tokenizer = new natural.WordTokenizer(),
loc = path.resolve(__dirname, 'content'), loc = path.resolve(__dirname, 'content'),
html = /(<[^>]*>)|(&[^;]+;)/g,
scraper = { scraper = {
title: /\[meta:title\]:\s<>\s\((.+?)\)(?!\))/, title: /\[meta:title\]:\s<>\s\((.+?)\)(?!\))/,
description: /\[meta:description\]:\s<>\s\((.+?)\)(?!\))/, description: /\[meta:description\]:\s<>\s\((.+?)\)(?!\))/,
firstline: /([\-a-zA-Z0-9&;,]*\s+){5,}\w*/ firstlines: /^((.*\n){2}){1,3}/
}; };


// //
Expand All @@ -23,7 +22,7 @@ var path = require('path'),
function scrape(content, key, n) { function scrape(content, key, n) {
if (!content) return ''; if (!content) return '';


var match = content.replace(/\n/g, ' ').match(scraper[key]); var match = content.match(scraper[key]);


// Only return scraped content if there is a meta:[key]. // Only return scraped content if there is a meta:[key].
return match && match[n] ? match[n].trim() : ''; return match && match[n] ? match[n].trim() : '';
Expand All @@ -48,7 +47,7 @@ function normalize(file) {
function fileContent(content) { function fileContent(content) {
return { return {
content: content || '', content: content || '',
description: scrape(content, 'description', 1) || scrape(content, 'firstline', 0), description: scrape(content, 'description', 1) || scrape(content, 'firstlines', 0),
title: scrape(content, 'title', 1), title: scrape(content, 'title', 1),
tags: tags(content, 10) tags: tags(content, 10)
}; };
Expand Down

0 comments on commit 52e689f

Please sign in to comment.