Skip to content
This repository has been archived by the owner on Feb 1, 2024. It is now read-only.

Add extended field reports #1826

Merged
merged 2 commits into from
May 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
### Added

- Add new taxonomy processing types [#1827](https://github.com/open-apparel-registry/open-apparel-registry/pull/1827)
- Add extended field reports [#1826](https://github.com/open-apparel-registry/open-apparel-registry/pull/1826)

### Changed

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 2.2.24 on 2022-05-05 11:52

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('api', '0089_reprocess_parent_company'),
]

operations = [
migrations.AlterField(
model_name='extendedfield',
name='field_name',
field=models.CharField(choices=[('name', 'name'), ('address', 'address'), ('number_of_workers', 'number_of_workers'), ('native_language_name', 'native_language_name'), ('facility_type', 'facility_type'), ('processing_type', 'processing_type'), ('product_type', 'product_type'), ('parent_company', 'parent_company')], help_text='The name of the field, chosen from a strict list.', max_length=200),
),
migrations.AlterField(
model_name='historicalextendedfield',
name='field_name',
field=models.CharField(choices=[('name', 'name'), ('address', 'address'), ('number_of_workers', 'number_of_workers'), ('native_language_name', 'native_language_name'), ('facility_type', 'facility_type'), ('processing_type', 'processing_type'), ('product_type', 'product_type'), ('parent_company', 'parent_company')], help_text='The name of the field, chosen from a strict list.', max_length=200),
),
]
6 changes: 1 addition & 5 deletions src/django/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2372,7 +2372,6 @@ class ExtendedField(models.Model):
Fields will be related to either a claim or list item; they must reference
one, but not both.
"""
COUNTRY = 'country'
NAME = 'name'
ADDRESS = 'address'
NUMBER_OF_WORKERS = 'number_of_workers'
Expand All @@ -2385,17 +2384,14 @@ class ExtendedField(models.Model):
PROCESSING_TYPE = 'processing_type'

FIELD_CHOICES = (
(COUNTRY, COUNTRY),
(NAME, NAME),
(ADDRESS, ADDRESS),
(NUMBER_OF_WORKERS, NUMBER_OF_WORKERS),
(NATIVE_LANGUAGE_NAME, NATIVE_LANGUAGE_NAME),
(FACILITY_TYPE, FACILITY_TYPE),
(PROCESSING_TYPE, PROCESSING_TYPE),
(PRODUCT_TYPE, PRODUCT_TYPE),
(PARENT_COMPANY, PARENT_COMPANY),
(FACILITY_TYPE, FACILITY_TYPE),
(PROCESSING_TYPE, PROCESSING_TYPE))
(PARENT_COMPANY, PARENT_COMPANY))

contributor = models.ForeignKey(
'Contributor',
Expand Down
177 changes: 172 additions & 5 deletions src/django/api/reports.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,32 @@
import json
import os
import pytz
from collections import defaultdict

from datetime import datetime
from dateutil.relativedelta import relativedelta
from django.conf import settings
from django.utils import timezone
from glob import glob
from urllib.parse import quote

from django.db import connection
from django.db.models import Func, F
from api.models import HistoricalFacility, FacilityListItem
from django.db.models import Func, F, Q
from api.models import HistoricalFacility, FacilityListItem, ExtendedField
from api.constants import ProcessingAction, DateFormats

_root = os.path.abspath(os.path.dirname(__file__))


def sort_by_first_column(array):
return sorted(array, key=lambda x: x[0])
def _report_error_to_rollbar(message, extra_data=None):
ROLLBAR = getattr(settings, 'ROLLBAR', {})
if ROLLBAR:
import rollbar
rollbar.report_message(message, level='error', extra_data=extra_data)


def sort_by_first_column(array, reverse=False):
return sorted(array, key=lambda x: x[0], reverse=reverse)


def try_parsing_date(text):
Expand Down Expand Up @@ -120,6 +130,153 @@ def weekly_geocoding_time_with_queue():
return [['week', 'average_geocoding_time_in_seconds'], rows]


def submitted_product_type_values():
data = defaultdict(int)

product_type_lists = ExtendedField.objects.filter(
field_name='product_type'
).values_list('value__raw_values', flat=True)
for product_types in product_type_lists:
for p in product_types:
data[p.strip()] = data[p.strip()] + 1
rows = sort_by_first_column(data.items())
return [['product_type_values', 'times_submitted'], rows]


def value_is_valid(value):
return (value is not None and value.strip() != ""
and value.lower() != "other"
and value.lower() != "denim services"
and value.lower() != "boarding")


def process_raw_values(raw_values):
values = raw_values

if isinstance(raw_values, str):
values = (raw_values.split('|') if '|' in raw_values
else [raw_values])

deduped_values = list(dict.fromkeys(values))

filtered_values = []

for value in deduped_values:
if value_is_valid(value):
filtered_values.append(value)

return filtered_values


def processing_type_facility_type_matched():
temp_data = defaultdict(set)

for (raw_values, matched_values) in ExtendedField.objects.filter(
Q(field_name='processing_type') | Q(field_name='facility_type')
).distinct(
'value__raw_values', 'value__matched_values'
).values_list('value__raw_values', 'value__matched_values').iterator():
values = process_raw_values(raw_values)

if len(values) == len(matched_values):
for i, raw_value in enumerate(values):
if matched_values[i][3] is not None:
temp_data[matched_values[i][3]].add(raw_value.strip())
temp_data[matched_values[i][2]].add(raw_value.strip())
else:
_report_error_to_rollbar((
'processing_type_facility_type_matched encountered '
'mismatched processing type value count'
), extra_data={
'deduped_values': json.dumps(values),
'matched_values': json.dumps(matched_values),
})

data = dict()
for (raw_value, match_values) in temp_data.items():
data[raw_value] = ' | '.join(list(match_values))

rows = sort_by_first_column(data.items())
return [['taxonomy_value', 'raw_values'], rows]


def processing_type_facility_type_unmatched():
data = defaultdict(int)

for (raw_values, matched_values) in ExtendedField.objects.filter(
Q(field_name='processing_type') | Q(field_name='facility_type')
).distinct(
'value__raw_values', 'value__matched_values'
).values_list('value__raw_values', 'value__matched_values').iterator():
values = process_raw_values(raw_values)

if len(values) == len(matched_values):
for i, raw_value in enumerate(values):
if matched_values[i][3] is None:
data[raw_value.strip()] = data[raw_value.strip()] + 1
else:
_report_error_to_rollbar((
'processing_type_facility_type_unmatched encountered '
'mismatched processing type value count'
), extra_data={
'deduped_values': json.dumps(values),
'matched_values': json.dumps(matched_values),
})

rows = sort_by_first_column(data.items())
return [['processing_type_facility_type', 'times_submitted'], rows]


def get_extended_field_columns():
columns = ['month']
for field_name, _ in ExtendedField.FIELD_CHOICES:
if (field_name == 'name' or field_name == 'address'):
columns.append('{} Claimed'.format(field_name))
else:
columns.append(field_name)
return columns


def contributors_with_extended_fields_cumulative():
data = defaultdict(list)
months = ExtendedField.objects.dates('created_at', 'month')
for month in months:
m = month.strftime(DateFormats.MONTH)
data[m] = [m]
fields = ExtendedField.objects.filter(
created_at__year__lte=month.year,
created_at__month__lte=month.month)

for field_name, _ in ExtendedField.FIELD_CHOICES:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a problem introduced in this PR, but adding this use of FIELD_CHOICES exposed the fact that facility type and processing type are repeated . We will want to remove them, which will likely require creating a migration since is "changes" the choices attached to a model field.

166006960-ad98c044-0077-4889-b8cf-7c31f04cc1a9

https://github.com/open-apparel-registry/open-apparel-registry/blame/a070e8be9dce56112de4ebb757f477c374b0e595/src/django/api/models.py#L2387-L2398

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made a model change and a migration in 2ae5f45. I checked my local data after migrating to confirm that removing two of the values didn't have any impact on my processing_type and facility_type fields, just in case, and confirmed there were no empty values for the field_name.

field_count = fields.filter(field_name=field_name).distinct(
'contributor_id').count()
data[m].append(field_count)

columns = get_extended_field_columns()
rows = sort_by_first_column(data.values(), reverse=True)
TaiWilkin marked this conversation as resolved.
Show resolved Hide resolved
return [columns, rows]


def facilities_with_extended_fields_cumulative():
data = defaultdict(list)
months = ExtendedField.objects.dates('created_at', 'month')
for month in months:
m = month.strftime(DateFormats.MONTH)
data[m] = [m]
fields = ExtendedField.objects.filter(
created_at__year__lte=month.year,
created_at__month__lte=month.month)

for field_name, _ in ExtendedField.FIELD_CHOICES:
field_count = fields.filter(field_name=field_name).distinct(
'facility_id').count()
data[m].append(field_count)

columns = get_extended_field_columns()
rows = sort_by_first_column(data.values(), reverse=True)
return [columns, rows]


NON_SQL_REPORTS = {
'monthly_promoted_name_and_address': monthly_promoted_name_and_address,
'recent_monthly_geocoding_time_without_queue':
Expand All @@ -128,7 +285,17 @@ def weekly_geocoding_time_with_queue():
weekly_geocoding_time_without_queue,
'recent_monthly_geocoding_time_with_queue':
monthly_geocoding_time_with_queue,
'recent_weekly_geocoding_time_with_queue': weekly_geocoding_time_with_queue
'recent_weekly_geocoding_time_with_queue':
weekly_geocoding_time_with_queue,
'submitted_product_type_values': submitted_product_type_values,
'processing_type_facility_type_matched_values':
processing_type_facility_type_matched,
'processing_type_facility_type_unmatched_values':
processing_type_facility_type_unmatched,
'contributors_with_extended_fields_cumulative':
contributors_with_extended_fields_cumulative,
'facilities_with_extended_fields_cumulative':
facilities_with_extended_fields_cumulative
}


Expand Down