Skip to content
This repository has been archived by the owner on May 13, 2019. It is now read-only.

Commit

Permalink
[#1002] Replacing all ad hoc name standardization with Name Cleaver
Browse files Browse the repository at this point in the history
  • Loading branch information
Alison Rowland committed Nov 23, 2011
1 parent 810b856 commit 89321c2
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 165 deletions.
2 changes: 1 addition & 1 deletion fec/views.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Create your views here.
import json, csv, os
from django.http import HttpResponse
from influence.names import standardize_name
from influence.helpers import standardize_name
from django.template.defaultfilters import slugify

from django.contrib.localflavor.us.us_states import US_STATES
Expand Down
15 changes: 13 additions & 2 deletions influence/helpers.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
from django.http import Http404
from django.template.defaultfilters import slugify
from influence import external_sites
from influence.names import standardize_name
from influenceexplorer import DEFAULT_CYCLE
from settings import api, LATEST_CYCLE
import datetime
import googleanalytics
import re
from django.utils.datastructures import SortedDict
from name_cleaver import PoliticianNameCleaver, OrganizationNameCleaver, \
IndividualNameCleaver


_standardizers = {
'politician': lambda n: PoliticianNameCleaver(n).parse(),
'individual': lambda n: IndividualNameCleaver(n).parse(),
'industry': lambda n: OrganizationNameCleaver(n).parse(),
'organization': lambda n: OrganizationNameCleaver(n).parse(),
}

def standardize_name(name, type):
return _standardizers[type](name)

def bar_validate(data):
''' take a dict formatted for submission to the barchart
Expand Down
90 changes: 0 additions & 90 deletions influence/names.py

This file was deleted.

4 changes: 2 additions & 2 deletions influence/sitemaps.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from django.contrib.sitemaps import Sitemap
from django.contrib.sitemaps.views import index, sitemap
from django.conf import settings
from influence import names
from influence import helpers
from django.template.defaultfilters import slugify
from django.http import HttpResponse
import os
Expand Down Expand Up @@ -40,7 +40,7 @@ class EntitySitemap(Sitemap):

def __init__(self):
entity_type = self.entity_type or 'individual'
self.cleaner = names._standardizers[entity_type]
self.cleaner = helpers._standardizers[entity_type]

def clean(self, string):
return slugify(self.cleaner(string))
Expand Down
13 changes: 6 additions & 7 deletions influence/templatetags/influence_extras.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from django import template
from django.template.defaultfilters import stringfilter
from influence.names import standardize_individual_name, standardize_organization_name, \
standardize_industry_name
from name_cleaver import PoliticianNameCleaver
from name_cleaver import PoliticianNameCleaver, IndividualNameCleaver, \
OrganizationNameCleaver

register = template.Library()

Expand All @@ -16,17 +15,17 @@ def standardize_politician_name_filter(name):
@register.filter(name='standardize_individual_name')
@stringfilter
def standardize_individual_name_filter(name):
return standardize_individual_name(name)
return str(IndividualNameCleaver(name).parse())

@register.filter(name='standardize_organization_name')
@stringfilter
def standardize_organization_name_filter(name):
return standardize_organization_name(name)
return str(OrganizationNameCleaver(name).parse())

@register.filter(name='standardize_industry_name')
@stringfilter
def standardize_industry_name_filter(name):
return standardize_industry_name(name)
return str(OrganizationNameCleaver(name).parse())


seat_labels = {'federal:senate': 'US Senate',
Expand Down Expand Up @@ -109,4 +108,4 @@ def sunlight_author_uri(value):
shortname = "%s%s"%(value[0][:1],value[-1])
else:
shortname = value[0]
return "%s%s"%(SUNLIGHT_STAFF_BASE_URI, shortname)
return "%s%s"%(SUNLIGHT_STAFF_BASE_URI, shortname)
51 changes: 0 additions & 51 deletions influence/tests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
from django.test import TestCase

from influence import names
from settings import api

CYCLE = 2008
Expand Down Expand Up @@ -110,52 +108,3 @@ def test_reg_issues(self):
self.assertLength(10, api.org.registrant_issues(self.NICKLES, CYCLE))


class IndividualNameStandardizationTests(TestCase):

def test_all_kinds_of_crazy(self):
self.assertEqual('Stanford Z Rothschild', names.standardize_individual_name('ROTHSCHILD 212, STANFORD Z MR'))

def test_jr_and_the_like_end_up_at_the_end(self):
self.assertEqual('Frederick A "Tripp" Baird III', names.standardize_individual_name('Baird, Frederick A "Tripp" III'))

def test_throw_out_mr(self):
self.assertEqual('T Boone Pickens', names.standardize_individual_name('Mr T Boone Pickens'))
self.assertEqual('T Boone Pickens', names.standardize_individual_name('Mr. T Boone Pickens'))
self.assertEqual('T Boone Pickens', names.standardize_individual_name('Pickens, T Boone Mr'))
self.assertEqual('John L Nau', names.standardize_individual_name(' MR JOHN L NAU,'))

def test_keep_the_mrs(self):
self.assertEqual('Mrs T Boone Pickens', names.standardize_individual_name('Mrs T Boone Pickens'))
self.assertEqual('Mrs. T Boone Pickens', names.standardize_individual_name('Mrs. T Boone Pickens'))
self.assertEqual('Mrs Stanford Z Rothschild', names.standardize_individual_name('ROTHSCHILD 212, STANFORD Z MRS'))

def test_capitalize_roman_numeral_suffixes(self):
self.assertEqual('Ken Cuccinelli II', names.standardize_individual_name('KEN CUCCINELLI II'))
self.assertEqual('Ken Cuccinelli II', names.standardize_individual_name('CUCCINELLI II, KEN'))
self.assertEqual('Ken Cuccinelli IV', names.standardize_individual_name('CUCCINELLI IV, KEN'))
self.assertEqual('Ken Cuccinelli IX', names.standardize_individual_name('CUCCINELLI IX, KEN'))

def test_capitalize_scottish_last_names(self):
self.assertEqual('Ronald McDonald', names.standardize_individual_name('RONALD MCDONALD'))
self.assertEqual('Old MacDonald', names.standardize_individual_name('OLD MACDONALD'))


class OrganizationNameStandardizationTests(TestCase):

def test_capitalize_pac(self):
self.assertEqual('Nancy Pelosi Leadership PAC', names.standardize_organization_name('NANCY PELOSI LEADERSHIP PAC'))

def test_make_single_word_names_ending_in_pac_all_uppercase(self):
self.assertEqual('ECEPAC', names.standardize_organization_name('ECEPAC'))

def test_names_starting_with_PAC(self):
self.assertEqual('PAC For Engineers', names.standardize_organization_name('PAC FOR ENGINEERS'))
self.assertEqual('PAC 102', names.standardize_organization_name('PAC 102'))

def test_doesnt_bother_names_containing_string_pac(self):
self.assertEqual('Pacific Trust', names.standardize_organization_name('PACIFIC TRUST'))

def test_capitalize_scottish_names(self):
self.assertEqual('McDonnell Douglas', names.standardize_individual_name('MCDONNELL DOUGLAS'))
self.assertEqual('MacDonnell Douglas', names.standardize_individual_name('MACDONNELL DOUGLAS'))

14 changes: 6 additions & 8 deletions influence/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@
from influence.helpers import prepare_entity_view, generate_label, barchart_href, \
bar_validate, pie_validate, months_into_cycle_for_date, \
filter_bad_spending_descriptions, make_bill_link, get_top_pages
from influence.names import standardize_organization_name, \
standardize_industry_name
from influenceexplorer import DEFAULT_CYCLE
from name_cleaver import PoliticianNameCleaver
from name_cleaver import PoliticianNameCleaver, OrganizationNameCleaver
from settings import LATEST_CYCLE, TOP_LISTS_CYCLE, api
from urllib2 import URLError
import datetime
Expand Down Expand Up @@ -207,7 +205,7 @@ def org_contribution_section(entity_id, standardized_name, cycle, amount, type,
if type == 'industry':
section['top_orgs'] = json.dumps([
{
'key': generate_label(standardize_organization_name(org['name'])),
'key': generate_label(str(OrganizationNameCleaver(org['name']).parse())),
'value': org['total_amount'],
'value_employee': org['employee_amount'],
'value_pac': org['direct_amount'],
Expand All @@ -233,7 +231,7 @@ def org_contribution_section(entity_id, standardized_name, cycle, amount, type,
pacs_barchart_data = []
for record in recipient_pacs:
pacs_barchart_data.append({
'key': generate_label(standardize_organization_name(record['name'])),
'key': generate_label(str(OrganizationNameCleaver(record['name']).parse())),
'value' : record['total_amount'],
'value_employee' : record['employee_amount'],
'value_pac' : record['direct_amount'],
Expand Down Expand Up @@ -473,7 +471,7 @@ def pol_contribution_section(entity_id, standardized_name, cycle, amount, extern
contributors_barchart_data = []
for record in top_contributors:
contributors_barchart_data.append({
'key': generate_label(standardize_organization_name(record['name'])),
'key': generate_label(str(OrganizationNameCleaver(record['name']).parse())),
'value' : record['total_amount'],
'value_employee' : record['employee_amount'],
'value_pac' : record['direct_amount'],
Expand All @@ -485,7 +483,7 @@ def pol_contribution_section(entity_id, standardized_name, cycle, amount, extern
industries_barchart_data = []
for record in top_industries:
industries_barchart_data.append({
'key': generate_label(standardize_industry_name(record['name'])),
'key': generate_label(str(OrganizationNameCleaver(record['name']).parse())),
'href': barchart_href(record, cycle, 'industry'),
'value' : record['amount'],
})
Expand Down Expand Up @@ -607,7 +605,7 @@ def indiv_contribution_section(entity_id, standardized_name, cycle, amount, exte
orgs_barchart_data = []
for record in recipient_orgs:
orgs_barchart_data.append({
'key': generate_label(standardize_organization_name(record['recipient_name'])),
'key': generate_label(str(OrganizationNameCleaver(record['recipient_name']).parse())),
'value' : record['amount'],
'href' : barchart_href(record, cycle, entity_type="organization"),
})
Expand Down
7 changes: 3 additions & 4 deletions postcards/management/commands/makepostcards.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
from gevent import monkey
import gevent
from django.utils.datastructures import SortedDict
from influence.helpers import standardize_organization_name, standardize_industry_name
from name_cleaver import PoliticianNameCleaver
from name_cleaver import PoliticianNameCleaver, OrganizationNameCleaver
from django.template.defaultfilters import slugify
import urllib2
import re
Expand Down Expand Up @@ -171,13 +170,13 @@ def fetch_photos(candidate):
contributions = SortedDict()
for cont in candidate['contributions'][:5]:
if float(cont['total_amount']) >= 0:
contributions[standardize_organization_name(cont['name'])] = cont['total_amount']
contributions[str(OrganizationNameCleaver(cont['name']).parse())] = cont['total_amount']

# industry data
industries = SortedDict()
for cont in candidate['industries'][:5]:
if float(cont['amount']) >= 0:
industries[standardize_industry_name(cont['name'])] = cont['amount']
industries[str(OrganizationNameCleaver(cont['name']).parse())] = cont['amount']

#deal with the name
name_obj = PoliticianNameCleaver(candidate['name']).parse()
Expand Down

0 comments on commit 89321c2

Please sign in to comment.