From 52e689f817029ea18edede52c88d1f0bf9983fd5 Mon Sep 17 00:00:00 2001 From: Swaagie Date: Thu, 14 Mar 2013 17:00:30 +0100 Subject: [PATCH] [fix] scrape more lines, keep new lines --- index.js | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/index.js b/index.js index 8e3419a..5785196 100644 --- a/index.js +++ b/index.js @@ -6,11 +6,10 @@ var path = require('path'), lunr = require('lunr'), tokenizer = new natural.WordTokenizer(), loc = path.resolve(__dirname, 'content'), - html = /(<[^>]*>)|(&[^;]+;)/g, scraper = { title: /\[meta:title\]:\s<>\s\((.+?)\)(?!\))/, description: /\[meta:description\]:\s<>\s\((.+?)\)(?!\))/, - firstline: /([\-a-zA-Z0-9&;,]*\s+){5,}\w*/ + firstlines: /^((.*\n){2}){1,3}/ }; // @@ -23,7 +22,7 @@ var path = require('path'), function scrape(content, key, n) { if (!content) return ''; - var match = content.replace(/\n/g, ' ').match(scraper[key]); + var match = content.match(scraper[key]); // Only return scraped content if there is a meta:[key]. return match && match[n] ? match[n].trim() : ''; @@ -48,7 +47,7 @@ function normalize(file) { function fileContent(content) { return { content: content || '', - description: scrape(content, 'description', 1) || scrape(content, 'firstline', 0), + description: scrape(content, 'description', 1) || scrape(content, 'firstlines', 0), title: scrape(content, 'title', 1), tags: tags(content, 10) };