In [None]:
# Pull raw listing data out of DB in one swoop
# But exclude listings with a tract not in the census tract dataframe
all_tract_ids_in_df = list(TRACT_DATA.tract_id)
raw_data = (Listing.objects
    .filter(tract_id__in=all_tract_ids_in_df)
    .annotate(point_geojson=AsGeoJSON('point'))
    .annotate(host_experience_days=ExpressionWrapper(
            F('last_scraped') - F('host_since'), 
            output_field=DurationField()))
    ).values('id',
             'host_experience_days', 
             'point_geojson',
             'neighborhood_id',
             'tract_id',
             'zipcode_id',
             'block_group_id',
             'host_is_superhost',
             'host_identity_verified',
             'property_type',
             'room_type',
             'accommodates',
             'bathrooms',
             'bedrooms',
             'bed_type',
             'minimum_nights',
             'price',
             'availability_365',
             'estimated_revenue_per_month')

# Convert QuerySet to plain list of dicts
raw_data = list(raw_data)

# Convert timedeltas to ints; convert geojson points to lat, lon variables;
for item in raw_data:
    # geojson -- > lat, lon
    geojson = literal_eval(item['point_geojson']) # Parse json string
    item['longitude'] = geojson['coordinates'][0]
    item['latitude'] = geojson['coordinates'][1]
    item.pop('point_geojson') # we don't need this anymore    
    # timedelta --> int
    item['host_experience_days'] = item['host_experience_days'].days

# Add amenity fields
all_amenity_ids = Amenity.objects.order_by('id').values_list('id', flat=True)    
for item in raw_data:
    its_amenities = Listing.objects.get(id=item['id']).amenities.values_list('id', flat=True)
    amenity_data = {
        'amenity_%d' % amenity_id: amenity_id in its_amenities 
        for amenity_id in all_amenity_ids
    }
    item.update(amenity_data)