In [1]:
PROJECT_FOLDER = "/apps/prod/logos"

In [2]:
# # For future reference, this is how we can set up the test database in a notebook, for examples

# import sys, os
# DJANGO_LOCATION = "/Users/pvankessel/.pyenv/versions/3.6.5/envs/python3/lib/python3.6/site-packages"
# sys.path.append(DJANGO_LOCATION)
# import django
# os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
# os.environ['DJANGO_SETTINGS_MODULE'] = 'testapp.settings'
# django.setup()

# from django import test
# from django.db import connection
# test.utils.setup_test_environment() # Setup the environment
# db = connection.creation.create_test_db() # Create the test db

# from testapp.tests.abstract_models import AbstractModelTests
# AbstractModelTests().setUp()

In [3]:
# Something we should probably add to django_pewtils
def set_up_django_project(project_name, project_path, env_file=None):
    
    import django
    import os, sys
    import numpy as np
    from contextlib import closing

    from rasterio.env import GDALDataFinder
    os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
    os.environ["DJANGO_SETTINGS_MODULE"] = "{}.settings".format(project_name)
    os.environ["GDAL_DATA"] = GDALDataFinder().search()
    
    if env_file:
        with closing(open(env_file, "r")) as infile:
            for line in infile.readlines():
                key = line.split("=")[0]
                if key != "PYTHONPATH":
                    value = "=".join(line.split("=")[1:]).strip('"').strip("\n")
                    os.environ[key] = value

    for i, path in enumerate({project_path, '{}/src'.format(project_path)}):
        try:
            del sys.path[sys.path.index(path)]
        except ValueError:
            pass
        sys.path.insert(i, path)
    for folder in os.listdir('{}/src'.format(project_path)):
        sys.path.insert(0, os.path.join(project_path, "src", folder))
    if "/apps/prod" not in project_path:
        for path in list(sys.path):
            if '/apps/prod' in path:
                del sys.path[sys.path.index(path)]
    
    django.setup()
    
# Change the folder paths to wherever you have sermonator checked out
set_up_django_project(
    "logos", 
    PROJECT_FOLDER, 
    env_file="{}/deploy/prod/master/conf/logos.env".format(PROJECT_FOLDER)
)

In [None]:
from logos.models import *
import pandas as pd

# Django Verifications

Django Verifications is designed to make it easier to check database data for accuracy and make corrections where necessary. It also prevents any accidental modifications to data once it's been checked. Let's take a look at Logos, where we use Django Verifications to check our list of politicians' Facebook pages. We'll take a look at Bernie Sanders' personal Facebook page, which is an instance of the FacebookPage model in Logos.

In [76]:
bernie = FacebookPage.objects.get(facebook_id="124955570892789")
bernie

<FacebookPage: berniesanders (Bernard 'Bernie' Sanders)>

In [77]:
type(bernie)

logos.models.facebook.FacebookPage

### Configuration

To configure Django Verifications, you need to tell it a few things:

1) Which tables you want to verify. You do this by simply having those models inherit from `django_verifications.models.VerifiedModel`

In [78]:
from django_verifications.models import VerifiedModel
isinstance(bernie, VerifiedModel)

True

2) Which fields on the table you want to verify. You do this by defining a `fields_to_verify` list as one of your model's `Meta` attributes

In [79]:
FacebookPage._meta.fields_to_verify

['politician', 'is_official', 'account_type']

3) Which rows in the table you care about. You do this by defining Django filters in a `verification_filters` list in your model's `Meta` attributes

In [80]:
FacebookPage._meta.verification_filters

[{'politician__isnull': False}]

4) What information you want to use when you're coding. You do this by defining a custom `get_verification_metadata()` function on your model - it just needs to return a dictionary. If you don't define this function, Django Verifications will just pull everything it can from the table

In [81]:
bernie.get_verification_metadata()

{'name': 'Bernie Sanders',
 'username': 'berniesanders',
 'city': 'Burlington',
 'state': <State: Vermont>,
 'link': 'https://www.facebook.com/berniesanders/',
 'category': 'Public Figure',
 'websites': ['https://berniesanders.com'],
 'about': 'This is the official page for Bernie Sanders. Join our political revolution!',
 'bio': None,
 'politician': "Bernard 'Bernie' Sanders --- latest term: Bernard 'Bernie' Sanders term as Senator of Vermont, U.S. Senate (Class 1), 2019 - 2025",
 'other_accounts': ['senatorsanders'],
 'facebook_id': '124955570892789'}

### Verification objects

Django Verifications provides an interface for verifying and correcting data, but behind the scenes, what it's actually doing is creating associations between your app's VerifiedModel models, and its own Verification model. Let's see everything we've verified in Logos:

In [82]:
from django_verifications.models import Verification

Verification.objects.all().count()

22738

The Verification model actually has its own `VerificationManager` with some handy filtering functions to help sort through everything.

In [83]:
Verification.objects.available_model_names()

['politician', 'twitter_profile', 'facebook_page']

In [84]:
Verification.objects.filter_by_model_name("facebook_page").count()

10672

In [85]:
Verification.objects.flagged_for_verification("facebook_page").count()

3058

You usually don't need to worry about this though - not only is everything taken care of through the interface, but your own `VerifiedModel` instances also get their own `VerifiedModelManager` that provides all this functionality in the opposite direction, so there's no need to import anything from Django Verifications.

In [86]:
FacebookPage.objects.flagged_for_verification().count()

3058

# Making corrections

Let's take a look at Bernie

In [87]:
bernie.verifications.all()

<VerificationManager [<Verification: Verification object (459121)>, <Verification: Verification object (459122)>, <Verification: Verification object (459123)>]>

In [88]:
pd.DataFrame.from_records(bernie.verifications.values())

Unnamed: 0,id,field,user_id,timestamp,is_good,notes,corrected,content_type_id,object_id
0,459121,politician,2,2021-05-25 10:34:25.656757,True,,False,14,1316
1,459122,is_official,2,2021-05-25 10:34:25.676816,True,,False,14,1316
2,459123,account_type,2,2021-05-25 10:39:04.878026,True,,True,14,1316


We've already verified everything, so if we try to change any of those fields, we'll get an error:

In [89]:
bernie.account_type = None
bernie.save()

VerifiedFieldLock: Cannot modify field account_type on object berniesanders (Bernard 'Bernie' Sanders) due to existing verification (currently 'pol_personal', attempted to replace with 'None')

As a demonstration, though - let's get rid of those verifications and get Bernie back in the queue so we can check out the interface.

In [90]:
bernie.verifications.all().delete()
bernie.account_type = None
bernie.save()

In [91]:
print(FacebookPage.objects.flagged_for_verification().count())
print(FacebookPage.objects.has_unexamined_fields().count())
print(FacebookPage.objects.all_fields_good_or_corrected().count())
print(FacebookPage.objects.any_field_incorrect().count())
print(FacebookPage.objects.all_fields_examined().count())

3058
1
3057
0
3057


# ***Let's go into the interface and make corrections***
https://logos.pewresearch.tech/verifications

In [92]:
print(FacebookPage.objects.flagged_for_verification().count())
print(FacebookPage.objects.has_unexamined_fields().count())
print(FacebookPage.objects.all_fields_good_or_corrected().count())
print(FacebookPage.objects.any_field_incorrect().count())
print(FacebookPage.objects.all_fields_examined().count())

3058
0
3057
1
3058


In [93]:
print(FacebookPage.objects.flagged_for_verification().count())
print(FacebookPage.objects.has_unexamined_fields().count())
print(FacebookPage.objects.all_fields_good_or_corrected().count())
print(FacebookPage.objects.any_field_incorrect().count())
print(FacebookPage.objects.all_fields_examined().count())

3058
0
3058
0
3058


In [94]:
pd.DataFrame.from_records(bernie.verifications.values())

Unnamed: 0,id,field,user_id,timestamp,is_good,notes,corrected,content_type_id,object_id
0,459124,politician,2,2021-05-25 11:26:08.845798,True,,False,14,1316
1,459125,is_official,2,2021-05-25 11:26:47.201979,True,,True,14,1316
2,459126,account_type,2,2021-05-25 11:26:47.207914,True,,True,14,1316


In [95]:
bernie.account_type = None
bernie.save()

VerifiedFieldLock: Cannot modify field account_type on object berniesanders (Bernard 'Bernie' Sanders) due to existing verification (currently 'pol_personal', attempted to replace with 'None')