Skip to content

Commit

Permalink
Adds in an analysis script to parse the contact-congress YAML files t…
Browse files Browse the repository at this point in the history
…o understand the data used by various reps
  • Loading branch information
Leah Jones committed Apr 30, 2015
1 parent eaf1d68 commit 3fb1837
Show file tree
Hide file tree
Showing 5 changed files with 293 additions and 0 deletions.
Empty file added analysis/README.md
Empty file.
25 changes: 25 additions & 0 deletions analysis/contact-congress-analysis.js
@@ -0,0 +1,25 @@
/**
* Analysis script to parse the contents of the Member YAML files from:
* https://github.com/unitedstates/contact-congress
*/

var lodash = require('lodash');
var path = require('path');

var makeContactCongressStats = require('./contact-congress-stats').makeContactCongressStats;
var readMemberFiles = require('./parse-contact-congress').readMemberFiles;


var logAnalysisData = function(err, results) {

if (err) {
throw err;
}

var stats = makeContactCongressStats(results);
console.log(stats.stepData.select);
};


var membersDir = path.join(process.argv.slice(2)[0], 'members');
readMemberFiles(membersDir, logAnalysisData);
101 changes: 101 additions & 0 deletions analysis/contact-congress-stats.js
@@ -0,0 +1,101 @@
/**
*
*/

var lodash = require('lodash');
var numeral = require('numeral');


var makeFillInStats = function(results) {

var allFields = [];

var stepCount = 0;
lodash.forEach(results, function(res) {
stepCount += res.stepData['fill_in'].length;
allFields = lodash.flatten([allFields, res.stepData['fill_in']], true);
});

var distinctFields = lodash.union(allFields);
var fieldCount = lodash.countBy(allFields);
var percent;
var fieldStats = lodash.mapValues(fieldCount, function(val, key) {
percent = val / stepCount;
return {percentage: numeral(percent).format('0.000%'), count: val};
});

return {
distinct: distinctFields,
stats: fieldStats
};
};


var makeSelectStats = function(results) {

var allSelectValues = [];
var stepCount = 0;
lodash.forEach(results, function(res) {
stepCount += res.stepData['select'].length;

lodash.forEach(res.stepData.select, function(selectVals) {
lodash.forEach(selectVals, function(select) {
allSelectValues.push(select.value);
});
});
});

var distinctFields = lodash.union(allSelectValues);
var selectCount = lodash.countBy(allSelectValues);
var percent;
var selectStats = lodash.mapValues(selectCount, function(val) {
percent = val / stepCount;
return {percentage: numeral(percent).format('0.000%'), count: val};
});

return {
distinct: distinctFields,
stats: selectStats
}
};


var stepStatFns = {
'fill_in': makeFillInStats,
'select': makeSelectStats
};


/**
*
* @param results
*/
var makeCCStats = function(results) {

var allStepTypes = [];

lodash.forEach(results, function(res) {
allStepTypes.push(res.stepTypes);
});
allStepTypes = lodash.union.apply(this, allStepTypes);

var stepData = {};
var statsFn;
lodash.forEach(allStepTypes, function(stepType) {
statsFn = stepStatFns[stepType];

if (!lodash.isUndefined(statsFn)) {
stepData[stepType] = statsFn(results);
}
});

var res = {
stepTypes: allStepTypes,
stepData: stepData
};

return res;
};


module.exports.makeContactCongressStats = makeCCStats;
163 changes: 163 additions & 0 deletions analysis/parse-contact-congress.js
@@ -0,0 +1,163 @@
/**
* Functions to parse the contact-congress YAML files and return a data structure with useful info.
*/

var async = require('async');
var fs = require('fs');
var lodash = require('lodash');
var path = require('path');
var yaml = require('js-yaml');


var parseFillInData = function(fields) {
var fieldValues = [];

for (var i = 0, field; i < fields.length; ++i) {
field = fields[i];

// There are some examples of fields with value of empty string, e.g. reps C000127, C001071 etc.
// These fields appear to be a mix of oddly coded values (mphone) and some kind of
// identifiers, possibly session or user ids, e.g. field_dbe73130-2c88-447b-bb88-8a097e611511
// If there's no value, skip the field
if (field.value !== '') {
fieldValues.push(field.value);
}
}

return fieldValues;
};


var parseSelectData = function(selects) {
var selectValues = [];

for (var i = 0, select; i < selects.length; ++i) {
select = selects[i];

selectValues.push({
value: select.value,
options: select.options
});
}

return selectValues;
};


/**
* Functions for parsing data for a particular step type.
*
* The set of possible steps is:
* - visit
* - fill_in
* - click_on
* - find
* - select
* - choose
* - check
* - wait
* - uncheck
*
* ... of these, we care about fill_in and select
*
* @type {{}}
*/
var stepParserFns = {
'fill_in': parseFillInData,
'select': parseSelectData
};


/**
*
* @param memberData
*/
var parseMemberData = function(memberData) {
var bioguideId = memberData.bioguide;
var steps = memberData['contact_form'].steps;

var res = {
bioguideId: bioguideId,
stepTypes: [],
stepData: {
'fill_in': [],
select: []
}
};

for (var i = 0, step, stepType, parser, stepData; i < steps.length; ++i) {
step = steps[i];
stepType = lodash.keys(step)[0];
res.stepTypes.push(stepType);

parser = stepParserFns[stepType];

if (!lodash.isUndefined(parser)) {
stepData = parser(step[stepType]);
res.stepData[stepType].push(stepData);
}
}

return res;
};


/**
*
* @param memberFile
*/
var readMemberFile = function(memberFile, cb) {
fs.readFile(memberFile, 'utf8', function(err, data) {
if (err) {
throw err;
}

try {
var memberData = parseMemberData(yaml.safeLoad(data));
cb(null, memberData);
} catch (err) {
cb(err, null);
}
});
};


/**
* Returns an array of objects describing the contents of member YAML files.
*
* Objects are structured like:
* {
* bioguideId: 'A1001',
* stepTypes: ['visit', 'fill_in', ...],
* stepData: [
* 'fill_in': [...],
* ...
* ]
* }
*
* @param membersDir
* @param cb
*/
var readMemberFiles = function(membersDir, cb) {
fs.readdir(membersDir, function(err, files) {

if (err) {
throw err;
}

var yamlFiles = lodash.filter(files, function(filename) {
var isDir = fs.statSync(path.join(membersDir, filename)).isDirectory();
// Exclude directories and files beginning with .
return !isDir && !(filename.indexOf('.') === 0)
});

var tasks = lodash.map(yamlFiles, function(yamlFile) {
return lodash.partial(readMemberFile, path.join(membersDir, yamlFile));
});

async.parallel(tasks, cb);
});
};


module.exports.readMemberFiles = readMemberFiles;
4 changes: 4 additions & 0 deletions package.json
Expand Up @@ -39,10 +39,12 @@
"gulp-tap": "^0.1.3",
"gulp-uglify": "^1.0.1",
"gulp-util": "^3.0.4",
"js-yaml": "^3.3.0",
"karma": "^0.12.31",
"karma-mocha": "^0.1.10",
"mocha": "^2.2.1",
"nested-describe": "^1.0.1",
"numeral": "^1.5.3",
"pretty-hrtime": "^1.0.0",
"require-dir": "^0.3.0",
"run-sequence": "^1.0.2",
Expand All @@ -56,6 +58,7 @@
"angular-cookies": "^1.3.15",
"angular-route": "^1.3.14",
"angular-sanitize": "^1.3.15",
"async": "^0.9.0",
"bootstrap": "^3.3.2",
"bootstrap-sass": "3.3.2",
"change-case-keys": "0.0.4",
Expand All @@ -70,6 +73,7 @@
"lodash.filter": "^3.1.0",
"lodash.findwhere": "^3.0.1",
"lodash.foreach": "^3.0.1",
"lodash.includes": "^3.1.1",
"lodash.isarray": "^3.0.1",
"lodash.isempty": "^3.0.1",
"lodash.isnumber": "^3.0.1",
Expand Down

0 comments on commit 3fb1837

Please sign in to comment.