## Test cases for data integrity / geospatial ORM models

In [54]:
from main.models import Listing, Amenity, Neighborhood, Tract
from django.db.models import Sum

def test_listings_have_location(listings):
    passed = 0
    failed = 0
    for l in listings:
        try:
            assert l.point is not None
            passed += 1
        except AssertionError as e:
            failed += 1
    print("Testing that listings have non-null location attribute:")
    print("--> %d listings passed, %d listings failed.\n" % (passed, failed))


def test_listings_in_correct_tract(listings):
    passed = 0
    failed = 0
    for l in listings:
        try:
            if l.tract:
                assert l.tract.mpoly.contains(l.point)
            passed += 1
        except AssertionError as e:
            failed += 1
    print("Testing that listings are contained within the polygon of the assigned census tracts:")
    print("--> %d listings passed, %d listings failed.\n" % (passed, failed))

def test_amenity_assignments(amenities):
    passed = 0
    failed = 0
    for a in amenities:
        try:
            assert a.listing_set.count() > 0
            passed += 1
        except AssertionError as e:
            failed += 1
    print("Testing that every amenity observed in the data set has been assigned to at least one listing:")
    print("--> %d amenities passed, %d amenities failed.\n" % (passed, failed))

def test_neighborhood_age_distributions(neighborhoods):
    passed = 0
    failed = 0
    for n in neighborhoods:
        try:
            # Make sure that all age brackets add up to 1.0 (or approximately)
            # unless there is no age data for this neighborhood
            data = n.fixed_data
            age_brackets = ['percent_age_0_17', 'percent_age_18_34', 'percent_age_35_49',
                            'percent_age_50_64', 'percent_age_65_up']
            bracket_values = [data[bracket] if data[bracket] is not None else 0
                             for bracket in age_brackets]
            assert sum(bracket_values) == 0 or sum(bracket_values) > .999
            passed += 1
        except AssertionError as e:
            failed += 1
    print("Testing that neighborhood age bracket fractions sum to 1.0:")
    print("--> %d neighborhoods passed, %d neighborhoods failed.\n" % (passed, failed))

def test_neighborhood_tract_geospatial_aggregations(neighborhoods):
    passed = 0
    failed = 0
    for n in neighborhoods:
        try:
            combined_tract_area = sum(t.mpoly.area for t in n.tract_set.all())
            neighborhood_area = n.mpoly.area
            # Ignore neighborhoods with no tracts. This is expected, because
            # the census doesn't cover lakes, etc.
            if combined_tract_area > 0:
                log_difference = math.log(neighborhood_area, 10) - math.log(combined_tract_area, 10)
                assert abs(log_difference) <= 1.0 # within same order of magnitude
            passed += 1
        except AssertionError as e:
            failed += 1
    print("Testing that tract-neighborhood aggregations are geometrically sane (i.e., neighborhood and associated tract areas within same order of magnitude):")
    print("--> %d neighborhoods passed, %d neighborhoods failed.\n" % (passed, failed))

    


## Test results

In [56]:
# Run all the tests
test_listings_have_location(Listing.objects.all())
test_listings_in_correct_tract(Listing.objects.all())
test_amenity_assignments(Amenity.objects.all())
test_neighborhood_age_distributions(Neighborhood.objects.all())
test_neighborhood_tract_geospatial_aggregations(Neighborhood.objects.all())

Testing that listings have non-null location attribute:
--> 26070 listings passed, 0 listings failed.

Testing that listings are contained within the polygon of the assigned census tracts:
--> 26070 listings passed, 0 listings failed.

Testing that every amenity observed in the data set has been assigned to at least one listing:
--> 41 amenities passed, 0 amenities failed.

Testing that neighborhood age bracket fractions sum to 1.0:
--> 272 neighborhoods passed, 0 neighborhoods failed.

Testing that tract-neighborhood aggregations are geometrically sane (i.e., neighborhood and associated tract areas within same order of magnitude):
--> 272 neighborhoods passed, 0 neighborhoods failed.

