Skip to content

Commit

Permalink
werelate: facts
Browse files Browse the repository at this point in the history
  • Loading branch information
justincy committed Jul 29, 2016
1 parent 16bcdf3 commit 0f4ce0a
Show file tree
Hide file tree
Showing 6 changed files with 299 additions and 72 deletions.
2 changes: 1 addition & 1 deletion src/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -81,5 +81,5 @@ require('./scrapers/findmypast-record')(register);
require('./scrapers/findmypast-tree')(register);
require('./scrapers/genealogieonline')(register);
require('./scrapers/openarch')(register);
//require('./scrapers/werelate')(register);
require('./scrapers/werelate')(register);
require('./scrapers/wikitree')(register);
197 changes: 159 additions & 38 deletions src/scrapers/werelate.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
var debug = require('debug')('werelate'),
var debug = require('debug')('genscrape:scrapers:werela'),
utils = require('../utils'),
_ = require('lodash');
GedcomX = require('gedcomx-js');

var urls = [
utils.urlPatternToRegex("http://www.werelate.org/wiki/Person:*")
Expand All @@ -10,49 +10,169 @@ module.exports = function(register){
register(urls, run);
};

// A mapping of WeRelate fact types to GedcomX fact types
var factTypes = {
"Unknown": "http://werelate.org/Unknown",
"Birth": "http://gedcomx.org/Birth",
"Alt Birth": "http://gedcomx.org/Birth",
"Burial": "http://gedcomx.org/Burial",
"Alt Burial": "http://gedcomx.org/Burial",
"Christening": "http://gedcomx.org/Christening",
"Alt Christening": "http://gedcomx.org/Christening",
"Death": "http://gedcomx.org/Death",
"Alt Death": "http://gedcomx.org/Death",
"Adoption": "http://gedcomx.org/Adoption",
"Ancestral File Number": "http://werelate.org/AncestralFileNumber",
"Baptism": "http://gedcomx.org/Baptism",
"Bar Mitzvah": "http://gedcomx.org/BarMitzvah",
"Bat Mitzvah": "http://gedcomx.org/BatMitzvah",
"Blessing": "http://gedcomx.org/Blessing",
"Caste": "http://gedcomx.org/Caste",
"Cause of Death": "http://werelate.org/CauseOfDeath",
"Census": "http://gedcomx.org/Census",
"Citizenship": "http://werelate.org/Citizenship",
"Confirmation": "http://gedcomx.org/Confirmation",
"Cremation": "http://gedcomx.org/Cremation",
"Degree": "http://gedcomx.org/Education",
"DNA": "http://werelate.org/DNA",
"Education": "http://gedcomx.org/Education",
"Emigration": "http://gedcomx.org/Emigration",
"Employment": "http://gedcomx.org/Occupation",
"Excommunication": "http://gedcomx.org/Excommunication",
"First Communion": "http://gedcomx.org/FirstCommunion",
"Funeral": "http://gedcomx.org/Funeral",
"Graduation": "http://gedcomx.org/Education",
"Illness": "http://werelate.org/Illness",
"Immigration": "http://gedcomx.org/Immigration",
"Living": "http://gedcomx.org/Living",
"Medical": "http://gedcomx.org/Medical",
"Military": "http://gedcomx.org/MilitaryService",
"Mission": "http://gedcomx.org/Mission",
"Namesake": "http://gedcomx.org/Namesake",
"Nationality": "http://gedcomx.org/Nationality",
"Naturalization": "http://gedcomx.org/Naturalization",
"Obituary": "http://werelate.org/Obituary",
"Occupation": "http://gedcomx.org/Occupation",
"Ordination": "http://gedcomx.org/Ordination",
"Pension": "http://werelate.org/Pension",
"Physical Description": "http://gedcomx.org/PhysicalDescription",
"Probate": "http://gedcomx.org/Probate",
"Property": "http://gedcomx.org/Property",
"Reference Number": "http://werelate.org/ReferenceNumber",
"Religion": "http://gedcomx.org/Religion",
"Residence": "http://gedcomx.org/Residence",
"Retirement": "http://gedcomx.org/Retirement",
"Soc Sec No": "http://gedcomx.org/NationalId",
"Stillborn": "http://gedcomx.org/Stillbirth",
"Title (nobility)": "http://werelate.org/TitleOfNobility",
"Will": "http://gedcomx.org/Will",
"Distribution List": "http://werelate.org/AfricanAmerican/Distribution List",
"Emancipation": "http://werelate.org/AfricanAmerican/Emancipation",
"Escape or Runaway": "http://werelate.org/AfricanAmerican/Escape or Runaway",
"Estate Inventory": "http://werelate.org/AfricanAmerican/Estate Inventory",
"Estate Settlement": "http://werelate.org/AfricanAmerican/Estate Settlement",
"First Appearance": "http://werelate.org/AfricanAmerican/First Appearance",
"Freedmen's Bureau": "http://werelate.org/AfricanAmerican/Freedmen's Bureau",
"Hired Away": "http://werelate.org/AfricanAmerican/Hired Away",
"Homestead": "http://werelate.org/AfricanAmerican/Homestead",
"Household List": "http://werelate.org/AfricanAmerican/Household List",
"Plantation Journal": "http://werelate.org/AfricanAmerican/Plantation Journal",
"Purchase": "http://werelate.org/AfricanAmerican/Purchase",
"Recapture": "http://werelate.org/AfricanAmerican/Recapture",
"Relocation": "http://werelate.org/AfricanAmerican/Relocation",
"Sale": "http://werelate.org/AfricanAmerican/Sale",
"Slave List": "http://werelate.org/AfricanAmerican/Slave List",
"Other": "http://werelate.org/Other"
};

function run(emitter){

var personData = {};
var recordData = {};
$('.wr-infotable-factsevents tr').each(function(){
var row = $(this);
var label = $.trim( $('span.wr-infotable-type', row).text() ).toLowerCase();
if( !recordData[label] ) {
recordData[label] = row;
}
});
debug('run');

// Process the name
if( recordData.name ) {
var nameParts = utils.splitName( $.trim( recordData.name.children().eq(1).children('span').text() ) );
personData.givenName = nameParts[0];
personData.familyName = nameParts[1];
}
var gedx = GedcomX(),
primaryPerson = GedcomX.Person();

gedx.addPerson(primaryPerson);

// Process birth info
if( recordData.birth ) {
var birthDate = $.trim( $('span.wr-infotable-date', recordData.birth).text() );
if( birthDate ) {
personData.birthDate = birthDate;
}
var birthPlace = $.trim( $('span.wr-infotable-place', recordData.birth).text() );
if( birthPlace ) {
personData.birthPlace = birthPlace;
}
}
//
// Facts
//

// Process death info
if( recordData.death ) {
var deathDate = $.trim( $('span.wr-infotable-date', recordData.death).text() );
if( deathDate ) {
personData.deathDate = deathDate;
// Gather the fact data
var facts = [];
Array.from(document.querySelectorAll('.wr-infotable-factsevents tr')).forEach(function(row){
var label = utils.maybe(row.querySelector('span.wr-infotable-type')).textContent;
if(label){
facts.push({
label: label.trim(),
row: row
});
}
var deathPlace = $.trim( $('span.wr-infotable-place', recordData.death).text() );
if( deathPlace ) {
personData.deathPlace = deathPlace;
});

// Process the fact data
facts.forEach(function(factInfo){

var row = factInfo.row,
label = factInfo.label,
dateCell = row.children[1],
placeCell = row.children[2];

switch(label){

case 'Name':
primaryPerson.addSimpleName(dateCell.textContent.trim());
break;

case 'Gender':
switch(dateCell.textContent.trim()){
case 'Male':
primaryPerson.setGender({
type: 'http://gedcomx.org/Male'
});
break;
case 'Female':
primaryPerson.setGender({
type: 'http://gedcomx.org/Female'
});
break;
}
break;

// Most facts will have a date and a place
default:
var type = factTypes[label],
date = dateCell.textContent,
place = utils.maybe(placeCell.querySelector('span.wr-infotable-place')).textContent,
value = utils.maybe(placeCell.querySelector('span.wr-infotable-desc')).textContent;
if(type){
var fact = GedcomX.Fact({
type: type
});
if(date){
fact.setDate({
original: date
});
}
if(place){
fact.setPlace({
original: place
});
}
if(value){
fact.setValue(value);
}
primaryPerson.addFact(fact);
}
}
}

});

//
// Relationships
//

/*
// Process spouse's name
if( recordData.marriage ) {
var spouseNameParts = utils.splitName( $.trim( $('.wr-infotable-placedesc .wr-infotable-desc', recordData.marriage).text().substring(3) ) );
Expand All @@ -74,7 +194,8 @@ function run(emitter){
}
});
}
*/

emitter.emit('data', personData);
emitter.emit('data', gedx);

}
108 changes: 108 additions & 0 deletions test/data/werelate/output/washington.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
{
"persons": [
{
"id": "1",
"gender": {
"type": "http://gedcomx.org/Male"
},
"names": [
{
"nameForms": [
{
"fullText": "President George Washington",
"parts": [
{
"type": "http://gedcomx.org/Given",
"value": "President George"
},
{
"type": "http://gedcomx.org/Surname",
"value": "Washington"
}
]
}
]
}
],
"facts": [
{
"type": "http://gedcomx.org/Birth",
"date": {
"original": "22 Feb 1732"
},
"place": {
"original": "Wakefield, Westmoreland, Virginia, United States"
},
"value": "Bet Bridge's Creek and Pope's Creek"
},
{
"type": "http://gedcomx.org/MilitaryService",
"date": {
"original": "6 Nov 1752"
},
"place": {
"original": "Virginia, United States"
},
"value": "Major in the forces for Fredericksburg County"
},
{
"type": "http://gedcomx.org/Occupation",
"date": {
"original": "from 24 Jul 1758 to 1769"
},
"place": {
"original": "Williamsburg, Virginia, United States"
},
"value": "House of Burgesses of Virginia"
},
{
"type": "http://gedcomx.org/Property",
"date": {
"original": "14 Mar 1768"
},
"value": "Inherited Mount Vernon"
},
{
"type": "http://gedcomx.org/Occupation",
"date": {
"original": "5 Aug 1774"
},
"place": {
"original": "Philadelphia, Philadelphia, Pennsylvania, United States"
},
"value": "One of seven elected delegates from Virginia, to the Continental Congress."
},
{
"type": "http://gedcomx.org/Occupation",
"date": {
"original": "4 Feb 1789"
},
"place": {
"original": "United States"
},
"value": "Elected first \nPresident of the United States."
},
{
"type": "http://gedcomx.org/Death",
"date": {
"original": "14 Dec 1799"
},
"place": {
"original": "Fairfax (independent city), Virginia, United States"
},
"value": "Mount Vernon."
},
{
"type": "http://gedcomx.org/Burial",
"date": {
"original": "18 Dec 1799"
},
"place": {
"original": "Fairfax (independent city), Virginia, United States"
},
"value": "Private crypt at Mount Vernon"
}
]
}
]
}
File renamed without changes.
39 changes: 6 additions & 33 deletions test/src/scrapers/werelate.js
Original file line number Diff line number Diff line change
@@ -1,37 +1,10 @@
var path = require('path'),
expect = require('chai').expect,
helpers = require('../../testHelpers'),
genscrape = require('../../../');
var setupTest = require('../../testHelpers').createTestRunner('werelate');

describe.skip('werelate', function(){
describe.only('werelate', function(){

it('works', function(done){

// Loading from a file because it often errored silently
// in jsdom when loading over http
var url = 'http://www.werelate.org/wiki/Person:George_Washington_(6)',
filePath = path.join(__dirname, '..', 'responses', 'werelate', 'washington.html');
helpers.mockDom(url, filePath, function(){
genscrape()
.on('data', function(data){
expect(data).to.deep.equal({
givenName: 'President George',
familyName: 'Washington',
birthDate: '22 Feb 1732',
birthPlace: 'Wakefield, Westmoreland, Virginia, United States',
deathDate: '14 Dec 1799',
deathPlace: 'Fairfax (independent city), Virginia, United States',
spouseGivenName: 'Martha',
spouseFamilyName: 'Dandridge',
fatherGivenName: 'Captain Augustine',
fatherFamilyName: 'Washington',
motherGivenName: 'Mary',
motherFamilyName: 'Ball'
});
done();
})
})

})
it('simple', setupTest(
'washington',
'http://www.werelate.org/wiki/Person:George_Washington_(6)'
));

});

0 comments on commit 0f4ce0a

Please sign in to comment.