Skip to content

Commit

Permalink
Handle source dates with days of the week
Browse files Browse the repository at this point in the history
  • Loading branch information
kremio committed Oct 26, 2018
1 parent a37d3fc commit c926443
Show file tree
Hide file tree
Showing 4 changed files with 270 additions and 1 deletion.
18 changes: 18 additions & 0 deletions lib/german-days.js
@@ -0,0 +1,18 @@
const assert = require('assert')
const days = ['montag','dienstag','mittwoch','donnerstag','freitag','samstag','sonntag']

assert( days.length == 7 )

module.exports = {
days,
nameToNumber: (day, offset = 1) => days.findIndex( (d) => d == day.toLowerCase() ) + offset,
removeThem: (str, suffix = false) => {
const asRegex = days.map((d) => {
if(!suffix){
return d
}
return d+"("+suffix+")?"
}).join('|')
return str.replace( new RegExp( asRegex, 'gi' ), '' )
}
}
3 changes: 2 additions & 1 deletion lib/report.js
Expand Up @@ -5,6 +5,7 @@ const url = require('url')
const preset = require('./preset')
const {ReportParserError} = require('./errors')
const germanMonths = require('./german-months')
const germanDays = require('./german-days')
const boroughs = require('./boroughs')
const motives = require('./motives')
const factums = require('./factums')
Expand Down Expand Up @@ -152,7 +153,7 @@ const parseClassNames = (classNames) => {
*/

const parseSources = (sources) => {
const sourcesArr = sources.replace('Quelle:','')
const sourcesArr = germanDays.removeThem( sources.replace('Quelle:',''), ',')
.replace(/(\(.+),(.+\))/, '$1%$2') //replace commas inside parens by %
.replace(/\sund\s/,',') //turn " und "s into commas
.split(',')
Expand Down
11 changes: 11 additions & 0 deletions tests/lib/report.test.js
Expand Up @@ -28,6 +28,7 @@ const sourceWithMultipleDates = fs.readFileSync('./tests/samples/sourceWithMulti
const anonymousSource =fs.readFileSync('./tests/samples/anonymousSource.html', 'utf8')
const monthOnly = fs.readFileSync('./tests/samples/monthOnly.html', 'utf8')
const yearOnly = fs.readFileSync('./tests/samples/yearOnly.html', 'utf8')
const sourceDateWithDayOfWeek = fs.readFileSync('./tests/samples/sourceDateWithDayOfWeek.html', 'utf8')


request.mockImplementation((...args) => {
Expand Down Expand Up @@ -132,5 +133,15 @@ test( 'If only the year is provided record start date as 1st january of the year
expect( validateSchema(result) ).toBeTruthy()
expect( result.startDate ).toEqual( '2015-01-01T00:00:00.000Z' )
expect( result.endDate ).toEqual( '2015-12-31T00:00:00.000Z' )
})

test( 'Parse date starting with day of week', async() => {
request.mockImplementationOnce((...args) => {
const cb = args.pop()
cb( null, {statusCode: 200}, sourceDateWithDayOfWeek )
})
const result = await scrapeReport( 'https://domain.tld/path/to/page.html' )
expect( validateSchema(result) ).toBeTruthy()
expect( result.sources[1].publishedDate ).toEqual( '2014-12-11T00:00:00.000Z' )
})

0 comments on commit c926443

Please sign in to comment.