In [45]:
import pandas as pd
import matplotlib.pyplot as plt

file = 'nhts-2013-person-v1-20140718.csv'
direc = 'D:\\004_ReferenceMaterial\\02 - SA National\\03 - National Household Travel Survey\\csv\\'
spath = 'D:\\001_Projects\\01 - GIPTN\\04 - Macro Model\\NHTS George Summary\\'
PGFile = 'Mode_Share_Link.csv'
TGFile = 'Trip Gen George.csv'

path = direc + file
PGPath = spath + PGFile
TGPath = spath + TGFile
 
df = pd.read_csv(path)
dfPG = pd.read_csv(PGPath)
dfTG = pd.read_csv(TGPath)

In [46]:
def plot(col):    
    '''
    Plot the responses of a certain column by response number and number of respondents 
    This works assuming df is the NHTS data read from the files provided
    
    '''
    
    dfg = df.groupby([col]).UQNO.count()
    fig, ax = plt.subplots(figsize=(15,7))

    plt.bar(dfg.index, dfg)

    ax.set_xlabel('Response')
    ax.set_ylabel('Count of ' + col)
    ax.set_title(col)
    
    fig.savefig(direc + 'graph.png')

def replace(df): 
    '''
    Lazy filter to replace vehicle from NHHTS columns with main mode ie. either PuT or PrT or Walk
    
    '''

    df.Q33EDUII = df.Q33EDUII.replace([1, 2, 3, 4, 5, 6, 7, 8, 9, 88, 99], 
                                        ['School', 'School', 'Student', 'None',
                                         'Student', 'Student', 'Student',
                                         'None', 'None', 'None', 'None'])

    df.Q41WORK = df.Q41WORK.replace([1, 2, 3, 8], 
                                    ['EmployedF', 'EmployedIF', 'None', 'None'])

    df.Q42YNOTWRKK = df.Q42YNOTWRKK.replace([1, 2, 3, 4, 5, 6, 7, 8, 9, 88, 99], 
                                        ['None', 'Other', 'Retired', 'Other', 'Unemployed', 'Unemployed',
                                         'Other', 'EmployedF', 'Other', 'None', 'None'])

    df.Mainmode_q312 = df.Mainmode_q312.replace([1, 2, 3, 4, 5, 6, 7, 88, 99], 
                                        ['PuT', 'PuT', 'PuT', 'PrT', 'PrT', 'Walk',
                                         'Other', 'None', 'None'])
    
    df.Mainmode_q421 = df.Mainmode_q421.replace([1, 2, 3, 4, 5, 6, 7, 88, 99], 
                                        ['PuT', 'PuT', 'PuT', 'PrT', 'PrT', 'Walk',
                                         'Other', 'None', 'None'])
    
    df.Q63MAINM = df.Q63MAINM.replace([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
                                           12, 13, 14, 15, 16, 17, 18, 19, 20, 
                                           21, 22, 88, 99], 
                                        ['PuT', 'PuT', 'PuT', 'PuT', 'PuT', 'PuT',
                                         'PuT', 'PrT', 'PrT', 'PrT', 'PrT', 'PrT',
                                         'PrT', 'PrT', 'PrT', 'Cycl', 'PrT',
                                         'PrT', 'PrT', 'PuT', 'Walk', 'None',
                                         'None', 'None'])
    
    return df

def mode(PERSON_GROUP, Q312MODE1, Q412CYC, Mainmode_q312, Mainmode_q421, Q63MAINM):
    if Q312MODE1 == 16 or Q412CYC == 1:
        x = 'Cycl'
    elif PERSON_GROUP == "EmployedF" or PERSON_GROUP == "EmployedIF":
        x = Mainmode_q421
    elif PERSON_GROUP == "School" or PERSON_GROUP == "Student":
        x = Mainmode_q312
    else:
        x = Q63MAINM    
    
    return x

def calculate_percentage(df, TAZCODE = 0):
    
    if TAZCODE == 0:
        print("TAZDODE = " + TAZCODE)
    else:
        df = df[df['TAZCODE'] == TAZCODE]
            
    dfs = df.groupby(['Quintile', 'Q33EDUII', 'Q41WORK', 'Q42YNOTWRKK', 'Q312MODE1', 'Q312MODE2', 'Q312MODE3', 'Q411WLK', 
                      'Q412CYC', 'Q413DRIV', 'Q414TYPVEH', 'Mainmode_q312', 'Mainmode_q421', 'Q63MAINM']).UQNO.count().reset_index()

    dfs['Q33_41_42'] = dfs['Q33EDUII'] +  dfs['Q41WORK'] + dfs['Q42YNOTWRKK']

    dfs = dfs.merge(dfPG, left_on='Q33_41_42', right_on='Q33_41_42')

    dfs['MODE'] = dfs.apply(lambda x: mode(x['PERSON_GROUP'], x['Q312MODE1'], x['Q412CYC'], x['Mainmode_q312'], x['Mainmode_q421'], x['Q63MAINM']), axis=1)

    dfs = dfs.groupby(['Quintile', 'PERSON_GROUP', 'MODE']).UQNO.sum().reset_index()
    dfs['PERSON_GROUP'] = dfs['PERSON_GROUP'] + dfs['Quintile'].astype(str)
    dfs = dfs.groupby(['PERSON_GROUP', 'MODE']).UQNO.sum().reset_index()
    dfs = dfs.pivot(index='PERSON_GROUP', columns='MODE', values='UQNO')

    dfs = dfs.fillna(0)

    dfs['TOTAL'] = dfs['Cycl'] + dfs['PrT'] + dfs['PuT'] + dfs['Walk']

    dfs['CyclP'] = dfs['Cycl'] / dfs['TOTAL']

    dfs['PrTP'] = dfs['PrT'] / dfs['TOTAL']

    dfs['PuTP'] = dfs['PuT'] / dfs['TOTAL']

    dfs['WalkP'] = dfs['Walk'] / dfs['TOTAL']
    
    return dfs

def save(df, p):
    df.to_csv(p)

def print_columns(df):
    for columns in df.columns:
        print(columns)

In [41]:
TAZCODE = 9017

replace(df)

modeshare = calculate_percentage(df, TAZCODE = TAZCODE)

try:
    save(modeshare, spath + 'Mode_Share%s.csv' % (TAZCODE))
    print('completed')
except:
    print('Please close the mode share file and try again')
    
#print_columns(df)

completed


In [49]:
dfTG.groupby('TAZ Number').sum().reset_index().to_csv(spath + 'Mode_Share%s.csv')