# Transportation Emissions Exposure Analysis
Sum of daily exposure to emissions from transportation sources using ABM outputs

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np

## Use trip records to create activity patterns for each simulated person in the region

In [9]:
df = pd.read_csv('_trip.tsv', sep='\t')

# Generate unique person ID field 
df['person_id'] = df['hhno'].astype('str') + '_' + df['pno'].astype('str')

In [53]:
# Attach the total number of trips per person, from the person records
tot_trips = df[['person_id','trexpfac']].groupby('person_id').sum().reset_index()
tot_trips.columns = ['person_id','total_trips']

df = pd.merge(df,tot_trips,on='person_id',how='left')

In [162]:
# first trip departure is the end of the first activity; first origin parcel is location of first activity
first_trip = df.groupby(['person_id']).first()[['opcl','deptm']].reset_index()
first_trip.columns = ['person_id','parcel','end_time']

# Save this as the first activity for each person
activity = first_trip.copy()
activity['begin_time'] = 0
activity['activity_index'] = 0

# Group trips by person_id and iterate through each row of grouped results to get activities
max_trips_per_person = df['total_trips'].max()    # There are some people with 32 trips per day, may want to limit this...

for i in xrange(2,max_trips_per_person+1):    # Start with the second trip since we alreayd calculated the first
#     print i
    current_trip = df.groupby(['person_id']).nth(n=i-1)[['opcl','dpcl','arrtm','deptm','total_trips']].reset_index()
    activity_row = current_trip[['person_id','opcl','deptm']]    
    activity_row.columns = ['person_id','parcel','end_time']    # activity ends when trip from current locations starts
    
    # Use previous trip record to define activity begingging
    previous_trip = df.groupby(['person_id']).nth(n=i-2)[['arrtm']].reset_index()    # nth function is 0-based
    previous_trip.columns = ['person_id','begin_time']    # activity starts when previous trip arrives at past location
    
    # Merge info from current and previous trips to produce a complete activity record
    merged = pd.merge(activity_row, previous_trip, on='person_id', how='left')
    merged['activity_index']=i-1    # use 0-based index
    
    # add this activity to the dataframe
    activity = activity.append(merged)
    
    # For records where the current trip is the final trip, add the last activity
    last_activity_row = current_trip[current_trip['total_trips'] == i]    # use num of total trips to identifiy last trip rows
    if len(last_activity_row) > 0:
        last_activity_row = last_activity_row[['person_id','dpcl','arrtm']]    
        last_activity_row.columns = ['person_id','parcel','begin_time'] # use the arrival time and dpcl to get final activity location and start time
        last_activity_row['end_time'] = 24*60    # End of last activity is 24 hours
        last_activity_row['activity_index'] = i    # Add the 0-bsaed index

        # add this last activity to the dataframe
        activity = activity.append(last_activity_row)

2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32


In [175]:
# further separate each activity to 12 time periods to get hourly air quality estimates
# use floor to define the hour bin
activity['begin_hour'] = np.floor(activity['begin_time']/60.0).astype('int')
activity['end_hour'] = np.floor(activity['end_time']/60.0).astype('int')

# Keep track of what percent of activity occurred during this hour
activity['begin_hour_fraction'] = (activity['begin_time']-(activity['begin_hour']*60))/60
activity['end_hour_fraction'] = (activity['end_time']-((activity['end_hour'])*60))/60

In [177]:
# Write to CSV
activity.to_csv(r'C:/users/brice/activity.csv', index=False)

In [531]:
# Prepare to re-read
activity = pd.read_csv(r'C:/users/brice/activity.csv')

## Join Network Results to Activities
At the Census block level, calculate total hourly emissions exposure for each block by hour

In [178]:
# Activity results are at parcel level - aggregate to block level for now
block_parcel_lookup = pd.read_csv(r'R:\aq\new_parcel_block_lookup.txt')

# Add census block field (GEOID10) to the activity records
# We use this to help filter out blocks that don't appear in the activity df
activity = pd.merge(activity, block_parcel_lookup[['parcelid','GEOID10']], left_on='parcel',right_on='parcelid', how='left')

In [179]:
# Load intersection of blocks and network components (replace with parcel intersect in future)
# This was done in GIS, as an intersect between edges_0 and a layer of block centroids buffered at 500 ft.
# Ideally do this in code with geopandas
block_network = pd.read_csv(r'R:\aq\block_network_intersect.txt')

In [187]:
# Remove unneeded columns and rename temporarily
df = block_network[['Shape_Length','GEOID10','NewINode','NewJNode']]

# Remove any block that doesn't exist in activity dataframe
df = df[df['GEOID10'].isin(pd.unique(activity['GEOID10']))]

### Load emissions rates and join with network volumes

In [188]:
# Air quality output produced from Soundcast scripts\summarize\standard\air_quality script
aq_rates = pd.read_csv(r'L:\T2040\soundcast_2014\outputs\aq_2014_july.csv')

# List of pollutant IDs; not sure which ones we will need in the future
pollutant_list = [1,2,3,5,6,79,87,90,91,98,100,106,107,110,112,115,116,117,118,119]

# Reduce column size to only include pollutant totals, nodes, volume, and hour
aq_rates = aq_rates[['inode_x','jnode_x','total_volume','hourId']+[str(i) for i in pollutant_list]]

In [189]:
# Merge the network/block intersection with the hourly rates
block_rates = pd.merge(df, aq_rates, left_on=['NewINode','NewJNode'], right_on=['inode_x','jnode_x'], how='left')

In [190]:
# Compute the total grams emitted within the time frame
# by multiplying total volume by miles of network link
for pollutant in pollutant_list:
    block_rates[str(pollutant)+'_total_grams'] = block_rates['total_volume']*(block_rates['Shape_Length']/5280)*block_rates[str(pollutant)]

In [191]:
# Take sum of hourly emissions within each census block
total_hourly_block_grams = block_rates.groupby(['GEOID10','hourId']).sum()[['100_total_grams','1_total_grams']]
total_hourly_block_grams = total_hourly_block_grams.reset_index()

# Pollution totals by assignment period
Rates are given at soundcast assignment periods
Take the average for each hour to calcualte hourly exposure

In [192]:
def average_emissions_to_hours(df, hour_list):
    
    # First hour contains information summed for all time periods
    copy_df = df[df['hourId'] == hour_list[0]].copy()
    
    for hour in hour_list:
        _df = copy_df.copy()
#         print hour
        if hour == hour_list[0]:
            df = df[df['hourId'] != hour_list[0]]
        _df['hourId'] = hour
        _df[['100_total_grams','1_total_grams']]/(len(hour_list))
        df = df.append(_df)
        
    df = df.reset_index()
    df = df.drop('index', axis=1)
    return df

In [193]:
hourly_emissions_total = average_emissions_to_hours(total_hourly_block_grams, hour_list=[10,11,12,13])
hourly_emissions_total = average_emissions_to_hours(hourly_emissions_total, hour_list=[18,19])
hourly_emissions_total = average_emissions_to_hours(hourly_emissions_total, hour_list=[20,21,22,23,0,1,2,3,4])

In [194]:
# Write to file ?
hourly_emissions_total.to_csv(r'R:/aq/hourly_emissions_total.csv', index=False)

## Calculate hourly totals by activity
hourly_emissions_total is the sum of emissions (grams) released during each hour by block
We need to join this to the activity list to get the total per person per day


In [259]:
# Start with a single person's activity
# activity_sample  = activity[activity['person_id'] == '9_1']

In [195]:
# Calculate the total grams for a given block and time period

def total_activity_emissions(df, zone_num, emissions_type, begin_hour, begin_hour_share, end_hour, end_hour_share,
                            geography_field='GEOID10'):
    """Calculate the total grams per each activity"""
    
    df = hourly_emissions_total
    
    # Totals from first hour
    first_hour_total = df[(df[geography_field] == zone_num) & (df.hourId == begin_hour)][emissions_type].values[0]
    first_hour_total = first_hour_total*begin_hour_share    # Modify with % of hour at that location
    
    # Totals from last hour
    last_hour_total = df[(df[geography_field] == zone_num) & (df.hourId == end_hour)][emissions_type].values[0]
    last_hour_total = last_hour_total*end_hour_share    # Modify with % of hour at that location
    
    # Calculate totals for interim hours if necessary
    interim_total = 0
    if end_hour-begin_hour>1:
        for hour in xrange(begin_hour+1,end_hour):
            interim_total +=  df[(df[geography_field] == zone_num) & (df.hourId == hour)][emissions_type].values[0]
            
    activity_total = first_hour_total + interim_total + last_hour_total
    
    return activity_total

In [196]:
# I can't figure out how to use lambda functino for a full dataframe right now,
# so let's loop for now ...

# Only include activities that occur within areas that have pollution
df = activity[activity['GEOID10'].isin(pd.unique(hourly_emissions_total['GEOID10']))]

results = []
for i in xrange(len(df)):
    print i
    row = df.iloc[i]
    tot_emissions = total_activity_emissions(df, zone_num=row['GEOID10'], emissions_type='1_total_grams', 
                             begin_hour=row['begin_hour'], begin_hour_share=row['begin_hour_fraction'], 
                             end_hour=row['end_hour'], end_hour_share=row['end_hour_fraction'])
    results.append(tot_emissions)
    
df['total_exposure'] = results

Exception KeyboardInterrupt in 'zmq.backend.cython.message.Frame.__dealloc__' ignored


KeyboardInterrupt: 

In [None]:
df.groupby('person_id').sum()

In [619]:
# WOOHOO - total daily emissions exposure by person!

# For Analysis:
- attach activity type to each location
- other things