Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds in an analysis script to parse the contact-congress YAML files t…
…o understand the data used by various reps
- Loading branch information
Leah Jones
committed
Apr 30, 2015
1 parent
eaf1d68
commit 3fb1837
Showing
5 changed files
with
293 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
/** | ||
* Analysis script to parse the contents of the Member YAML files from: | ||
* https://github.com/unitedstates/contact-congress | ||
*/ | ||
|
||
var lodash = require('lodash'); | ||
var path = require('path'); | ||
|
||
var makeContactCongressStats = require('./contact-congress-stats').makeContactCongressStats; | ||
var readMemberFiles = require('./parse-contact-congress').readMemberFiles; | ||
|
||
|
||
var logAnalysisData = function(err, results) { | ||
|
||
if (err) { | ||
throw err; | ||
} | ||
|
||
var stats = makeContactCongressStats(results); | ||
console.log(stats.stepData.select); | ||
}; | ||
|
||
|
||
var membersDir = path.join(process.argv.slice(2)[0], 'members'); | ||
readMemberFiles(membersDir, logAnalysisData); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
/** | ||
* | ||
*/ | ||
|
||
var lodash = require('lodash'); | ||
var numeral = require('numeral'); | ||
|
||
|
||
var makeFillInStats = function(results) { | ||
|
||
var allFields = []; | ||
|
||
var stepCount = 0; | ||
lodash.forEach(results, function(res) { | ||
stepCount += res.stepData['fill_in'].length; | ||
allFields = lodash.flatten([allFields, res.stepData['fill_in']], true); | ||
}); | ||
|
||
var distinctFields = lodash.union(allFields); | ||
var fieldCount = lodash.countBy(allFields); | ||
var percent; | ||
var fieldStats = lodash.mapValues(fieldCount, function(val, key) { | ||
percent = val / stepCount; | ||
return {percentage: numeral(percent).format('0.000%'), count: val}; | ||
}); | ||
|
||
return { | ||
distinct: distinctFields, | ||
stats: fieldStats | ||
}; | ||
}; | ||
|
||
|
||
var makeSelectStats = function(results) { | ||
|
||
var allSelectValues = []; | ||
var stepCount = 0; | ||
lodash.forEach(results, function(res) { | ||
stepCount += res.stepData['select'].length; | ||
|
||
lodash.forEach(res.stepData.select, function(selectVals) { | ||
lodash.forEach(selectVals, function(select) { | ||
allSelectValues.push(select.value); | ||
}); | ||
}); | ||
}); | ||
|
||
var distinctFields = lodash.union(allSelectValues); | ||
var selectCount = lodash.countBy(allSelectValues); | ||
var percent; | ||
var selectStats = lodash.mapValues(selectCount, function(val) { | ||
percent = val / stepCount; | ||
return {percentage: numeral(percent).format('0.000%'), count: val}; | ||
}); | ||
|
||
return { | ||
distinct: distinctFields, | ||
stats: selectStats | ||
} | ||
}; | ||
|
||
|
||
var stepStatFns = { | ||
'fill_in': makeFillInStats, | ||
'select': makeSelectStats | ||
}; | ||
|
||
|
||
/** | ||
* | ||
* @param results | ||
*/ | ||
var makeCCStats = function(results) { | ||
|
||
var allStepTypes = []; | ||
|
||
lodash.forEach(results, function(res) { | ||
allStepTypes.push(res.stepTypes); | ||
}); | ||
allStepTypes = lodash.union.apply(this, allStepTypes); | ||
|
||
var stepData = {}; | ||
var statsFn; | ||
lodash.forEach(allStepTypes, function(stepType) { | ||
statsFn = stepStatFns[stepType]; | ||
|
||
if (!lodash.isUndefined(statsFn)) { | ||
stepData[stepType] = statsFn(results); | ||
} | ||
}); | ||
|
||
var res = { | ||
stepTypes: allStepTypes, | ||
stepData: stepData | ||
}; | ||
|
||
return res; | ||
}; | ||
|
||
|
||
module.exports.makeContactCongressStats = makeCCStats; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
/** | ||
* Functions to parse the contact-congress YAML files and return a data structure with useful info. | ||
*/ | ||
|
||
var async = require('async'); | ||
var fs = require('fs'); | ||
var lodash = require('lodash'); | ||
var path = require('path'); | ||
var yaml = require('js-yaml'); | ||
|
||
|
||
var parseFillInData = function(fields) { | ||
var fieldValues = []; | ||
|
||
for (var i = 0, field; i < fields.length; ++i) { | ||
field = fields[i]; | ||
|
||
// There are some examples of fields with value of empty string, e.g. reps C000127, C001071 etc. | ||
// These fields appear to be a mix of oddly coded values (mphone) and some kind of | ||
// identifiers, possibly session or user ids, e.g. field_dbe73130-2c88-447b-bb88-8a097e611511 | ||
// If there's no value, skip the field | ||
if (field.value !== '') { | ||
fieldValues.push(field.value); | ||
} | ||
} | ||
|
||
return fieldValues; | ||
}; | ||
|
||
|
||
var parseSelectData = function(selects) { | ||
var selectValues = []; | ||
|
||
for (var i = 0, select; i < selects.length; ++i) { | ||
select = selects[i]; | ||
|
||
selectValues.push({ | ||
value: select.value, | ||
options: select.options | ||
}); | ||
} | ||
|
||
return selectValues; | ||
}; | ||
|
||
|
||
/** | ||
* Functions for parsing data for a particular step type. | ||
* | ||
* The set of possible steps is: | ||
* - visit | ||
* - fill_in | ||
* - click_on | ||
* - find | ||
* - select | ||
* - choose | ||
* - check | ||
* - wait | ||
* - uncheck | ||
* | ||
* ... of these, we care about fill_in and select | ||
* | ||
* @type {{}} | ||
*/ | ||
var stepParserFns = { | ||
'fill_in': parseFillInData, | ||
'select': parseSelectData | ||
}; | ||
|
||
|
||
/** | ||
* | ||
* @param memberData | ||
*/ | ||
var parseMemberData = function(memberData) { | ||
var bioguideId = memberData.bioguide; | ||
var steps = memberData['contact_form'].steps; | ||
|
||
var res = { | ||
bioguideId: bioguideId, | ||
stepTypes: [], | ||
stepData: { | ||
'fill_in': [], | ||
select: [] | ||
} | ||
}; | ||
|
||
for (var i = 0, step, stepType, parser, stepData; i < steps.length; ++i) { | ||
step = steps[i]; | ||
stepType = lodash.keys(step)[0]; | ||
res.stepTypes.push(stepType); | ||
|
||
parser = stepParserFns[stepType]; | ||
|
||
if (!lodash.isUndefined(parser)) { | ||
stepData = parser(step[stepType]); | ||
res.stepData[stepType].push(stepData); | ||
} | ||
} | ||
|
||
return res; | ||
}; | ||
|
||
|
||
/** | ||
* | ||
* @param memberFile | ||
*/ | ||
var readMemberFile = function(memberFile, cb) { | ||
fs.readFile(memberFile, 'utf8', function(err, data) { | ||
if (err) { | ||
throw err; | ||
} | ||
|
||
try { | ||
var memberData = parseMemberData(yaml.safeLoad(data)); | ||
cb(null, memberData); | ||
} catch (err) { | ||
cb(err, null); | ||
} | ||
}); | ||
}; | ||
|
||
|
||
/** | ||
* Returns an array of objects describing the contents of member YAML files. | ||
* | ||
* Objects are structured like: | ||
* { | ||
* bioguideId: 'A1001', | ||
* stepTypes: ['visit', 'fill_in', ...], | ||
* stepData: [ | ||
* 'fill_in': [...], | ||
* ... | ||
* ] | ||
* } | ||
* | ||
* @param membersDir | ||
* @param cb | ||
*/ | ||
var readMemberFiles = function(membersDir, cb) { | ||
fs.readdir(membersDir, function(err, files) { | ||
|
||
if (err) { | ||
throw err; | ||
} | ||
|
||
var yamlFiles = lodash.filter(files, function(filename) { | ||
var isDir = fs.statSync(path.join(membersDir, filename)).isDirectory(); | ||
// Exclude directories and files beginning with . | ||
return !isDir && !(filename.indexOf('.') === 0) | ||
}); | ||
|
||
var tasks = lodash.map(yamlFiles, function(yamlFile) { | ||
return lodash.partial(readMemberFile, path.join(membersDir, yamlFile)); | ||
}); | ||
|
||
async.parallel(tasks, cb); | ||
}); | ||
}; | ||
|
||
|
||
module.exports.readMemberFiles = readMemberFiles; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters