Skip to content
This repository has been archived by the owner on Feb 1, 2024. It is now read-only.

Commit

Permalink
Merge pull request #1602 from open-apparel-registry/feature/jcw/paren…
Browse files Browse the repository at this point in the history
…t-company
  • Loading branch information
jwalgran committed Jan 27, 2022
2 parents a9598a0 + 1f7a7b0 commit 4c30cf8
Show file tree
Hide file tree
Showing 6 changed files with 201 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

- Add extended field handling to facility details [#1593](https://github.com/open-apparel-registry/open-apparel-registry/pull/1593)
- Add searchability to embed config [#1598](https://github.com/open-apparel-registry/open-apparel-registry/pull/1598)
- Create parent company extended fields [#1602](https://github.com/open-apparel-registry/open-apparel-registry/pull/1602)

### Changed

Expand Down
2 changes: 1 addition & 1 deletion src/app/src/util/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,7 @@ export const EXTENDED_FIELD_TYPES = [
{
label: 'Parent Company',
fieldName: 'parent_company',
formatValue: v => v,
formatValue: v => v.contributor_name || v.name || v.raw_value,
},
{
label: 'Facility Type',
Expand Down
18 changes: 16 additions & 2 deletions src/django/api/extended_fields.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import re
from api.models import ExtendedField
from api.models import Contributor, ExtendedField


def extract_range_value(value):
Expand All @@ -12,6 +12,19 @@ def create_extendedfield(field, field_value, item, contributor):
if field_value is not None and field_value != "":
if field == ExtendedField.NUMBER_OF_WORKERS:
field_value = extract_range_value(field_value)
elif field == ExtendedField.PARENT_COMPANY:
matches = Contributor.objects.filter_by_name(field_value)
if matches.exists():
field_value = {
'raw_value': field_value,
'contributor_name': matches[0].name,
'contributor_id': matches[0].id
}
else:
field_value = {
'raw_value': field_value,
'name': field_value
}
ExtendedField.objects.create(
contributor=contributor,
facility_list_item=item,
Expand All @@ -21,7 +34,8 @@ def create_extendedfield(field, field_value, item, contributor):


RAW_DATA_FIELDS = (ExtendedField.NUMBER_OF_WORKERS,
ExtendedField.NATIVE_LANGUAGE_NAME)
ExtendedField.NATIVE_LANGUAGE_NAME,
ExtendedField.PARENT_COMPANY)


def create_extendedfields_for_single_item(item, raw_data):
Expand Down
3 changes: 0 additions & 3 deletions src/django/api/migrations/0076_fill_clean_name_and_address.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@


def populate_cleaned_fields(apps, schema_editor):
print('')
print('Started filling clean name and address')
count = 0
FacilityListItem = apps.get_model('api', 'FacilityListItem')
for list_item in FacilityListItem.objects.exclude(name='', address='').iterator():
Expand All @@ -14,7 +12,6 @@ def populate_cleaned_fields(apps, schema_editor):
count += 1
if count % 1000 == 0:
print('Filled ' + str(count))
print('Finished filling clean name and address')


def do_nothing_on_reverse(apps, schema_editor):
Expand Down
29 changes: 29 additions & 0 deletions src/django/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django.contrib.postgres import fields as postgres
from django.contrib.postgres.indexes import GinIndex
from django.contrib.postgres.aggregates.general import ArrayAgg
from django.contrib.postgres.search import TrigramSimilarity
from django.db import models, transaction
from django.db.models import (F, Q, ExpressionWrapper)
from django.db.models.signals import post_save
Expand Down Expand Up @@ -85,6 +86,33 @@ def create_superuser(self, email, password, **extra_fields):
return self._create_user(email, password, **extra_fields)


class ContributorManager(models.Manager):
TRIGRAM_SIMILARITY_THRESHOLD = 0.5

def filter_by_name(self, name):
"""
Perform a fuzzy match on contributor name where the match exceeds a
confidence trheshold. The results are ordered by similarity, then
whether the Contirbutor is verified, then by whether the contributor
has active sources.
False is less than True so we order_by boolean fields in descending
order
"""
threshold = ContributorManager.TRIGRAM_SIMILARITY_THRESHOLD
matches = self \
.annotate(active_source_count=models.Count(
Q(source__is_active=True))) \
.annotate(
has_active_sources=ExpressionWrapper(
Q(active_source_count__gt=0),
models.BooleanField())) \
.annotate(similarity=TrigramSimilarity('name', name)) \
.filter(similarity__gte=threshold) \
.order_by('-similarity', '-is_verified', '-has_active_sources')
return matches


class Contributor(models.Model):
"""
A participant in or observer of the supply chain that will
Expand Down Expand Up @@ -188,6 +216,7 @@ class Contributor(models.Model):
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)

objects = ContributorManager()
history = HistoricalRecords()

@staticmethod
Expand Down
155 changes: 154 additions & 1 deletion src/django/api/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
RequestLog, DownloadLog, FacilityLocation, Source,
ApiLimit, ApiBlock, ContributorNotifications,
EmbedConfig, EmbedField, NonstandardField,
FacilityActivityReport, ExtendedField)
FacilityActivityReport, ExtendedField,
ContributorManager)
from api.oar_id import make_oar_id, validate_oar_id
from api.matching import match_facility_list_items, GazetteerCache
from api.processing import (parse_facility_list_item,
Expand Down Expand Up @@ -7273,3 +7274,155 @@ def test_inactive_match(self):
self.assertEquals(None, field['value'])
self.assertEquals('ExtraTwo', field_two['label'])
self.assertEquals(None, field_two['value'])


class ContributorManagerTest(TestCase):
fixtures = ['users', 'contributors']

def test_filter_by_name(self):
matches = Contributor.objects.filter_by_name('factory a')
self.assertGreater(matches.count(), 0)
self.assertEquals(1.0, matches[0].similarity)

matches = Contributor.objects.filter_by_name('factory')
self.assertGreater(matches.count(), 0)
self.assertLess(matches[0].similarity, 1.0)
self.assertGreater(matches[0].similarity,
ContributorManager.TRIGRAM_SIMILARITY_THRESHOLD)

def test_filter_by_name_verified(self):
user1 = User.objects.create(email='test1@test.com')
user2 = User.objects.create(email='test2@test.com')
c1 = Contributor \
.objects \
.create(admin=user1,
name='TESTING',
contrib_type=Contributor.OTHER_CONTRIB_TYPE)
c2 = Contributor \
.objects \
.create(admin=user2,
name='TESTING',
contrib_type=Contributor.OTHER_CONTRIB_TYPE)

matches = Contributor.objects.filter_by_name('TESTING')
self.assertEqual(2, matches.count())
# When the names are the same and neither is verified than the second
# contributor happens to sort first
self.assertEqual(c2, matches[0])

c1.is_verified = True
c1.save()
matches = Contributor.objects.filter_by_name('TESTING')
self.assertEqual(2, matches.count())
# Marking c1 as verified forces it to sort first
self.assertEqual(c1, matches[0])

def test_filter_by_name_source(self):
user1 = User.objects.create(email='test1@test.com')
user2 = User.objects.create(email='test2@test.com')
c1 = Contributor \
.objects \
.create(admin=user1,
name='TESTING',
contrib_type=Contributor.OTHER_CONTRIB_TYPE)
c2 = Contributor \
.objects \
.create(admin=user2,
name='TESTING',
contrib_type=Contributor.OTHER_CONTRIB_TYPE)

matches = Contributor.objects.filter_by_name('TESTING')
self.assertEqual(2, matches.count())
# When the names are the same and neither is verified than the second
# contributor happens to sort first
self.assertEqual(c2, matches[0])

Source \
.objects \
.create(source_type=Source.SINGLE,
is_active=True,
is_public=True,
contributor=c1)

matches = Contributor.objects.filter_by_name('TESTING')
self.assertEqual(2, matches.count())
# An active source forces it to sort first
self.assertEqual(c1, matches[0])

def test_filter_by_name_verified_and_source(self):
user1 = User.objects.create(email='test1@test.com')
user2 = User.objects.create(email='test2@test.com')
c1 = Contributor \
.objects \
.create(admin=user1,
name='TESTING',
contrib_type=Contributor.OTHER_CONTRIB_TYPE)
c2 = Contributor \
.objects \
.create(admin=user2,
name='TESTING',
contrib_type=Contributor.OTHER_CONTRIB_TYPE)

Source \
.objects \
.create(source_type=Source.SINGLE,
is_active=True,
is_public=True,
contributor=c1)

c2.is_verified = True
c2.save()

matches = Contributor.objects.filter_by_name('TESTING')
self.assertEqual(2, matches.count())
# A verified contributor sorts before one with a source
self.assertEqual(c2, matches[0])


class ParentCompanyTestCase(FacilityAPITestCaseBase):
def setUp(self):
super(ParentCompanyTestCase, self).setUp()
self.url = reverse('facility-list')

def join_group_and_login(self):
self.client.logout()
group = auth.models.Group.objects.get(
name=FeatureGroups.CAN_SUBMIT_FACILITY,
)
self.user.groups.set([group.id])
self.user.save()
self.client.login(email=self.user_email,
password=self.user_password)

def test_submit_parent_company_no_match(self):
self.join_group_and_login()
self.client.post(self.url, {
'country': "US",
'name': "Azavea",
'address': "990 Spring Garden St., Philadelphia PA 19123",
'parent_company': 'A random value'
})
self.assertEqual(1, ExtendedField.objects.all().count())
ef = ExtendedField.objects.first()
self.assertEqual(ExtendedField.PARENT_COMPANY, ef.field_name)
self.assertEqual({
'raw_value': 'A random value',
'name': 'A random value'
}, ef.value)

def test_submit_parent_company_fuzzy_match(self):
self.join_group_and_login()
self.client.post(self.url, {
'country': "US",
'name': "Azavea",
'address': "990 Spring Garden St., Philadelphia PA 19123",
'parent_company': 'TEST CNTRIBUTOR'
})
self.assertEqual(1, ExtendedField.objects.all().count())
ef = ExtendedField.objects.first()
self.assertEqual(ExtendedField.PARENT_COMPANY, ef.field_name)
self.assertEqual({
'raw_value': 'TEST CNTRIBUTOR',
'contributor_name': self.contributor.name,
'contributor_id': self.contributor.id
}, ef.value)

0 comments on commit 4c30cf8

Please sign in to comment.