In [1]:
import numpy as np
import pandas as pd
import matplotlib
import geopandas as gpd
import datetime,os,re,sys,subprocess
from socket import gethostname
from tabulate import tabulate
import h5py
from simpledbf import Dbf5 as dbf
from tables import open_file

In [2]:
def readEqvFile(eqvfile):
	""" Reads the given eqvfile and returns
        distnames: distnum -> distname
		distToTaz: distnum -> list of taznums
		tazToDist: taznum  -> list of distnums
		numdists:  just the number of districts
	"""		
	f = open(eqvfile, 'r')
	eqvtxt = f.read()
	f.close()

	distline_re	= re.compile('DIST (\d+)=(\d+)( .+)?')
	lines 		= eqvtxt.split("\n")
	lineno 		= 0
	distnames	= {}
	distToTaz	= {}
	tazToDist	= {} 
	while (lineno < len(lines)):
		m 		= distline_re.search(lines[lineno])
		if (m != None):
			# distnames[int(m.group(1))] = m.group(2)
			dist= int(m.group(1))
			taz = int(m.group(2))
			if (dist not in distToTaz):
				distToTaz[dist] = []              
			distToTaz[dist].append(taz)
			if (taz not in tazToDist):
				tazToDist[taz] = []
			tazToDist[taz].append(dist)
			if (m.group(3) != None):
				distnames[dist] = m.group(3).strip(' ')
		lineno	= lineno + 1
	numdists	= len(distnames)
	return (distnames, distToTaz, tazToDist, numdists) 
  
#newly added
def readCtlFile(ctlfile, runsummit=False):
    config = configparser.ConfigParser()
    config.read(ctlfile)
    return config

In [3]:
class ResidentPurposes:
    RP_HH           = "data/_household_2.dat"
    RP_PERSON       = "data/_person_2.dat"
    RP_TOUR         = "data/_tour_2.dat"
    RP_DISAG_TRIPS  = "data/_trip_2.dat"
    # RP_ROWS         = [ 'Work', 'Grade School', 'High School', 'College', 'Other', 'Workbased','Escort',
    #                    'Personal Business(including medical)','Social & Recreational','Shopping','Meals','Total' ]
    RP_ROWS         = [ 'Work', 'School','Escort','Personal Business & Medical','Shopping','Meals',
                       'Social & Recreational','Change Mode','Return Home','Total' ]
    #Can we change the row order? grade, high, college, work, workbased and other?
    RP_PURPOSES     = { 1:RP_ROWS[0], 2:RP_ROWS[1], 3:RP_ROWS[2], 4:RP_ROWS[3], 5:RP_ROWS[4], 6:RP_ROWS[5] }
    
    def __init__(self, eqvfile):
        #What is this? from champUtil?
        (self.distToName, self.distToTaz, self.tazToDist, _numdists ) = readEqvFile(eqvfile)
        self.purposesByTimeperiod = False
    
    def getChampPurpose(self, row, col_dict):
        #Unable to identify input row and col_dict
        #could change it into pandas...
        tour_parent = row[col_dict['parent']]
        ptype = row[col_dict['pptyp']]
        xfer_purp = row[col_dict['dpurp']]
        # if tour_parent>0:
        #     return ResidentPurposes.RP_ROWS[5] # Workbased
        if xfer_purp == 1:
            return ResidentPurposes.RP_ROWS[0] # Work
        elif xfer_purp == 2:
            return ResidentPurposes.RP_ROWS[1]
            # if ptype == 7: # child age 5-15
            #     return ResidentPurposes.RP_ROWS[1] # Grade School
            # elif ptype == 6: # child age 16+
            #     return ResidentPurposes.RP_ROWS[2] # High School
            # else: 
            #     return ResidentPurposes.RP_ROWS[3] # College
        elif xfer_purp == 3:
            return ResidentPurposes.RP_ROWS[2] #Escorts
        elif xfer_purp == 4:
            return ResidentPurposes.RP_ROWS[3] #Personal business
        elif xfer_purp == 5:
            return ResidentPurposes.RP_ROWS[4] #Shopping
        elif xfer_purp == 6:
            return ResidentPurposes.RP_ROWS[5] # Meals
        elif xfer_purp == 7:
            return ResidentPurposes.RP_ROWS[6] #Social
        elif xfer_purp == 10:
            return ResidentPurposes.RP_ROWS[7] #Change Mode
        elif xfer_purp == 0:
            return ResidentPurposes.RP_ROWS[8] #Home
        # else:
        #     return ResidentPurposes.RP_ROWS[4] # Other
    
    def getTimePeriod(self, row, col_dict):
        '''EA = early monring(3am till 6am), AM = monring a.m. peak(6am till 9am), 
        MD = midday(9am till 3.30pm), PM = p.m peak(3.30pm till 6.30pm) and EV = evening(6.30pm till 3am)
         ---- Time is given in minutes'''
        seg_dir = row[col_dict['half']]
        dep_time = row[col_dict['deptm']]
        arr_time = row[col_dict['arrtm']]
        use_time = arr_time if seg_dir==1 else dep_time
        if use_time>=180 and use_time<=359:
            return 'EA'
        elif use_time>=360 and use_time<=539:
            return 'AM'
        elif use_time>=540 and use_time<=929:
            return 'MD'
        elif use_time>=930 and use_time<=1109:
            return 'PM'
        elif use_time>=1110 or use_time<=179:
            return 'EV'
        else:
            return None
    
    def getResidentPurposes(self, rundir, timePeriod):
        """
        *rundir* is where we'll read the TRIPMC file
        *timePeriod* can be one of "Daily", "AM" or "PM"
        
        Returns dictionary of purpose (Work,School,etc) -> 
           list of trips for that purpose for each district
        """
        # if it's done already, return it
        if self.purposesByTimeperiod:
            # print("Check")
            return self.purposesByTimeperiod[timePeriod]
        
        # otherwise get it
        trip_df = pd.read_csv(ResidentPurposes.RP_DISAG_TRIPS, sep='\t', 
                              usecols = ['hhno','pno','tour_id','dpurp','half','deptm','arrtm'])
        tour_df = pd.read_csv(ResidentPurposes.RP_TOUR, sep='\t', usecols = ['id','parent'])
        hh_df   = pd.read_csv(ResidentPurposes.RP_HH, sep='\t', usecols = ['hhno','hhtaz'])
        per_df  = pd.read_csv(ResidentPurposes.RP_PERSON, sep='\t', usecols = ['hhno','pno','pptyp'])
        
        trip_df = trip_df.merge(hh_df, how='left')
        trip_df = trip_df.merge(per_df, how='left')
        trip_df = trip_df.merge(tour_df, how='left', left_on='tour_id', right_on='id')
        store = pd.HDFStore("trips.h5", 'w')
        store.put('root', trip_df, format='t')
        store.close()
        # print(trip_df)
        
        self.purposesByTimeperiod = { "Daily":{}, "AM":{}, "PM":{}, "EV":{}, "MD":{},"EA":{}}
        for tp in self.purposesByTimeperiod.keys():
            for purpose in  ResidentPurposes.RP_ROWS:
                if purpose == "Total": continue
                # self.purposesByTimeperiod[tp][purpose] = [0]*2460     #uncomment this for taz
                self.purposesByTimeperiod[tp][purpose] = [0]*15     #this shows the number of districts -- changing it to tazs (total = 2405 we take 2415)

        #Can use pandas or h5py instead of tables to do this processing
        infile = open_file("trips.h5", mode="r")
        col_names = infile.get_node('/root', 'table')._v_attrs.values_block_0_kind
        # row_nodes = infile.get_node('/root', 'table')
        # infile.close()
        col_idx_dict = dict([(col,i) for col,i in zip(col_names, list(range(len(col_names))))])        
#         print strftime("%x %X", localtime()) + " started Resident Purposes processing"
        for row in infile.get_node('/root', 'table'):
            row = row[1]
            resdist = int(self.tazToDist[row[col_idx_dict['hhtaz']]][0])      #matches the taz to district -- what if we remove the matching
            # resdist = int(row[col_idx_dict['hhtaz']])   #uncomment this for taz
            
            purpose = self.getChampPurpose(row, col_idx_dict) #what if we pass the whole df and get the whole result together??
            if purpose != None:
                timeperiod = self.getTimePeriod(row, col_idx_dict)
                # print(timeperiod)
                # print(self.purposesByTimeperiod)
                if timeperiod == "AM" or timeperiod == "PM" or timeperiod == "EV" or timeperiod == "MD" or timeperiod == "EA":
                    # print(purpose,resdist)
                    self.purposesByTimeperiod[timeperiod][purpose][resdist-1] += 1
                 
                self.purposesByTimeperiod["Daily"][purpose][resdist-1] += 1


#         print strftime("%x %X", localtime()) + " completed."
        # print self.purposesByTimeperiod[timePeriod]

        infile.close()
        print("Done!")
        return self.purposesByTimeperiod[timePeriod]

In [4]:
def getResidentPurposeFiles(eqv, loc, time = ['Daily']):
    """
    Write the resident purpose dictionary into csv and markdown files
    """
    rp = ResidentPurposes(eqv)

    for t in time:
        file_name = 'purpose_'+t
        # print(file_name)
        purpose_dict = rp.getResidentPurposes('./',t)
        purpose_df = pd.DataFrame(data = purpose_dict)
        purpose_df = purpose_df.transpose().reset_index()
        purpose_df.columns = ['Purpose','Downtown','San Francisco','Bay Area']
        # purpose_df1=purpose_df.copy()
        purpose_df['San Francisco'] = purpose_df['Downtown']+purpose_df['San Francisco']
        purpose_df['Bay Area'] = purpose_df['Bay Area']+purpose_df['San Francisco']
        
        purpose_df.drop(4,inplace=True) # removing Others
        # purpose_df1.drop(4,inplace=True) # removing Others

        total_row = ['Total',purpose_df['Downtown'].sum(),purpose_df['San Francisco'].sum(),purpose_df['Bay Area'].sum()]
        purpose_df.loc[11] = total_row
        purpose_df['Downtown %'] = purpose_df['Downtown']/total_row[1]
        purpose_df['SF %'] = purpose_df['San Francisco']/total_row[2]
        purpose_df['Bay %'] = purpose_df['Bay Area']/total_row[3]
        purpose_df['Downtown']=purpose_df.apply(lambda x: "{:,}".format(x['Downtown']), axis=1)
        purpose_df['San Francisco']=purpose_df.apply(lambda x: "{:,}".format(x['San Francisco']), axis=1)
        purpose_df['Bay Area']=purpose_df.apply(lambda x: "{:,}".format(x['Bay Area']), axis=1)

        csvfile_path = os.path.join(loc, file_name + '.csv')
        #CSV table
        purpose_df2=purpose_df.copy()
        purpose_df2.drop(11,inplace=True)
        purpose_df2.to_csv(csvfile_path, sep='\t',index=False)

        #Markdown table
        markdown_table = purpose_df.to_markdown(index=False).split('\n')
        header_row = markdown_table[0]
        header_row = '| ' + ' | '.join(f'**{header.strip()}**' for header in header_row.split('|')[1:-1]) + ' |'
        markdown_table[0] = header_row
        markdown_table[1] = markdown_table[1].replace('|', ':|:').replace('::',':')[1:-1]
        markdown_table = '\n'.join(markdown_table)

        # print(markdown_table)
        mdfile_path = os.path.join(loc, file_name + '.md')
        # print(mdfile_path)
        with open(mdfile_path, 'w') as f:
            f.write(markdown_table)
        
        print(f"Both files for {t} are finished writing!!!!")
            
    
    return "Resident Purpose files are written in " + loc
    



### TAZ extraction changes

In [5]:
eqv_file = 'shapefiles/DIST15.eqv'
# eqv_file2 = 'shapefiles/DIST15.eqv'
distnames, distToTaz, tazToDist, numdists = readEqvFile(eqv_file)
rp = ResidentPurposes(eqv_file)
taz_dict = rp.getResidentPurposes('./','Daily')
taz_df = pd.DataFrame(data = taz_dict)
taz_df.reset_index(inplace=True,names='TAZs')
taz_df['TAZs']+=1
taz_df



  expected_mb = (expectedrows * rowsize) // MB


Done!


Unnamed: 0,TAZs,Work,Grade School,High School,College,Other,Workbased,Escort,Personal Business(including medical),Social & Recreational,Shopping,Meals
0,1,456,112,30,76,1209,116,395,208,240,217,74
1,2,533,90,27,103,1497,213,435,347,285,287,119
2,3,397,149,30,81,1294,124,505,259,226,245,110
3,4,593,124,21,129,1696,198,524,347,333,329,149
4,5,618,138,39,90,1652,202,609,336,320,281,108
...,...,...,...,...,...,...,...,...,...,...,...,...
2455,2456,0,0,0,0,0,0,0,0,0,0,0
2456,2457,0,0,0,0,0,0,0,0,0,0,0
2457,2458,0,0,0,0,0,0,0,0,0,0,0
2458,2459,0,0,0,0,0,0,0,0,0,0,0


In [25]:
taz_df = pd.DataFrame(data = taz_dict)
taz_df.reset_index(inplace=True,names='TAZs')
taz_df['TAZs']+=1
taz_df['District'] = taz_df['TAZs'].map(tazToDist)
taz_df['District'] = taz_df['District'].str[0]
taz_df.fillna(value=0,inplace=True)
taz_df.to_csv('taz_rpurpose.csv',index=False)

In [4]:
eqv_file = 'shapefiles/DIST15.eqv'
# eqv_file2 = 'shapefiles/DIST15.eqv'
distnames, distToTaz, tazToDist, numdists = readEqvFile(eqv_file)
rp = ResidentPurposes(eqv_file)
purpose_dict={}
# purpose_dict['Daily'] = rp.getResidentPurposes('./','Daily')

timeperiods = ['Daily','AM','PM','EV','EA','MD']
for t in timeperiods:
    purpose_dict[t] = rp.getResidentPurposes('./',t)
# # getResidentPurposeFiles(eqv = eqv_file, loc = './', time = ["Daily"])

  expected_mb = (expectedrows * rowsize) // MB


Done!


In [7]:
pd.options.display.float_format = '{:,.0f}'.format
distnames, distToTaz, tazToDist, numdists = readEqvFile(eqv_file)
purpose_df1 = pd.DataFrame(data = purpose_dict['Daily'])
purpose_df1.reset_index(inplace=True,names='District')
purpose_df1['District']+=1
# purpose_df1.drop('Other', axis=1, inplace=True)
purpose_df1 = purpose_df1.apply(pd.to_numeric)
colms = purpose_df1.columns
purpose_df1['District'] = pd.to_numeric(purpose_df1['District'], errors='coerce')
purpose_df1['District'] = purpose_df1['District'].map(distnames)

purpose_df1.index = purpose_df1['District']
purpose_df1.drop(['District'],axis=1,inplace=True)

sum_first_12_rows = purpose_df1.iloc[:12].sum()
sum_all_rows = purpose_df1.sum()

# Create a new DataFrame with the sums
sum_df = pd.DataFrame([sum_all_rows,sum_first_12_rows], index=['Bay Area', 'San Francisco'])

# Concatenate the new DataFrame with the original DataFrame
df = pd.concat([sum_df, purpose_df1])
df.index.name = 'Districts'
df

Unnamed: 0_level_0,Work,School,Escort,Personal Business & Medical,Shopping,Meals,Social & Recreational,Change Mode,Return Home
Districts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bay Area,4102304,1129526,2236306,2121742,1866054,1066943,1994227,297538,8653427
San Francisco,548316,113811,263812,238099,224358,128205,209923,51886,1070371
Downtown,37758,8085,15989,16994,16880,9354,14271,2236,77255
SoMa,27414,4819,10615,11052,10806,6260,9373,3068,50560
N.Beach/ Chinatown,47942,6685,18885,20007,19944,10990,16758,3938,88964
Western Market,65964,10110,24999,25500,24541,14846,22355,7242,116945
Mission/ Potrero,60119,11175,30065,23627,22172,13303,21748,8874,113214
Noe/ Glen/ Bernal,36358,6188,16660,14856,13845,8402,13590,3512,66599
Marina/ N.Heights,53099,5316,17787,19433,18734,11314,17121,5754,86612
Richmond,43657,8728,19424,18791,17393,10047,16305,4452,81345


In [8]:
for col in colms[1:]:
    df[col] = df[col].apply(lambda x: '{:,.0f}'.format(x))   #Converts the int to string with commas
markdown_table = df.to_markdown(index=True).split('\n')
header_row = markdown_table[0]
header_row = '| ' + ' | '.join(f'**{header.strip()}**' for header in header_row.split('|')[1:-1]) + ' |'
markdown_table[0] = header_row
markdown_table[1] = markdown_table[1].replace('|', ':|:').replace('::','')[1:-1]
markdown_table = '\n'.join(markdown_table)

print(markdown_table)   # Save this for district purpose values

| **Districts** | **Work** | **School** | **Escort** | **Personal Business & Medical** | **Shopping** | **Meals** | **Social & Recreational** | **Change Mode** | **Return Home** |
|-------------------:|----------:|----------:|----------:|------------------------------:|-----------:|----------:|------------------------:|--------------:|--------------:|
| Bay Area           | 4,102,304 | 1,129,526 | 2,236,306 | 2,121,742                     | 1,866,054  | 1,066,943 | 1,994,227               | 297,538       | 8,653,427     |
| San Francisco      | 548,316   | 113,811   | 263,812   | 238,099                       | 224,358    | 128,205   | 209,923                 | 51,886        | 1,070,371     |
| Downtown           | 37,758    | 8,085     | 15,989    | 16,994                        | 16,880     | 9,354     | 14,271                  | 2,236         | 77,255        |
| SoMa               | 27,414    | 4,819     | 10,615    | 11,052                        | 10,806     | 6,260     | 9,373   

### Files save

| **Districts** | **Work** | **School** | **Escort** | **Personal Business & Medical** | **Shopping** | **Meals** | **Social & Recreational** | **Change Mode** | **Return Home** |
|-------------------:|----------:|----------:|----------:|------------------------------:|-----------:|----------:|------------------------:|--------------:|--------------:|
| **Bay Area**           | 4,102,304 | 1,129,526 | 2,236,306 | 2,121,742                     | 1,866,054  | 1,066,943 | 1,994,227               | 297,538       | 8,653,427     |
| **San Francisco**      | 548,316   | 113,811   | 263,812   | 238,099                       | 224,358    | 128,205   | 209,923                 | 51,886        | 1,070,371     |
| Downtown           | 37,758    | 8,085     | 15,989    | 16,994                        | 16,880     | 9,354     | 14,271                  | 2,236         | 77,255        |
| SoMa               | 27,414    | 4,819     | 10,615    | 11,052                        | 10,806     | 6,260     | 9,373                   | 3,068         | 50,560        |
| N.Beach/ Chinatown | 47,942    | 6,685     | 18,885    | 20,007                        | 19,944     | 10,990    | 16,758                  | 3,938         | 88,964        |
| Western Market     | 65,964    | 10,110    | 24,999    | 25,500                        | 24,541     | 14,846    | 22,355                  | 7,242         | 116,945       |
| Mission/ Potrero   | 60,119    | 11,175    | 30,065    | 23,627                        | 22,172     | 13,303    | 21,748                  | 8,874         | 113,214       |
| Noe/ Glen/ Bernal  | 36,358    | 6,188     | 16,660    | 14,856                        | 13,845     | 8,402     | 13,590                  | 3,512         | 66,599        |
| Marina/ N.Heights  | 53,099    | 5,316     | 17,787    | 19,433                        | 18,734     | 11,314    | 17,121                  | 5,754         | 86,612        |
| Richmond           | 43,657    | 8,728     | 19,424    | 18,791                        | 17,393     | 10,047    | 16,305                  | 4,452         | 81,345        |
| Bayshore           | 37,953    | 14,943    | 30,500    | 20,339                        | 18,430     | 9,380     | 18,548                  | 1,438         | 94,579        |
| Outer Mission      | 41,733    | 14,547    | 31,151    | 21,995                        | 19,986     | 10,437    | 20,107                  | 2,484         | 100,271       |
| Hill Districts     | 24,998    | 5,291     | 12,272    | 11,766                        | 11,021     | 6,261     | 10,324                  | 1,660         | 49,818        |
| Sunset             | 71,321    | 17,924    | 35,465    | 33,739                        | 30,606     | 17,611    | 29,423                  | 7,228         | 144,209       |
| South Bay          | 1,412,082 | 414,982   | 803,638   | 724,889                       | 636,353    | 366,183   | 693,076                 | 93,810        | 3,016,198     |
| East Bay           | 1,671,269 | 480,209   | 924,877   | 888,481                       | 772,466    | 440,111   | 840,315                 | 138,290       | 3,571,386     |
| North Bay          | 470,637   | 120,524   | 243,979   | 270,273                       | 232,877    | 132,444   | 250,913                 | 13,552        | 995,472       |

In [9]:
purpose_df1[:12].to_csv('Purpose_daily.csv',index=False)

In [10]:
purpose_df1 = pd.DataFrame(data = purpose_dict['Daily'])
purpose_df1.reset_index(inplace=True,names='District')
purpose_df1['District']+=1
# purpose_df1.drop('Other', axis=1, inplace=True)
purpose_df1[:12].to_csv('Purpose_daily.csv',index=False) #CSV table
purpose_df1


Unnamed: 0,District,Work,School,Escort,Personal Business & Medical,Shopping,Meals,Social & Recreational,Change Mode,Return Home
0,1,37758,8085,15989,16994,16880,9354,14271,2236,77255
1,2,27414,4819,10615,11052,10806,6260,9373,3068,50560
2,3,47942,6685,18885,20007,19944,10990,16758,3938,88964
3,4,65964,10110,24999,25500,24541,14846,22355,7242,116945
4,5,60119,11175,30065,23627,22172,13303,21748,8874,113214
5,6,36358,6188,16660,14856,13845,8402,13590,3512,66599
6,7,53099,5316,17787,19433,18734,11314,17121,5754,86612
7,8,43657,8728,19424,18791,17393,10047,16305,4452,81345
8,9,37953,14943,30500,20339,18430,9380,18548,1438,94579
9,10,41733,14547,31151,21995,19986,10437,20107,2484,100271


In [27]:
#markdown table - do we need sf and bay area total?
# pd.set_option('display.float_format', '{:.0f}'.format)
markdown_table = purpose_df1.to_markdown(index=False,floatfmt='.0f').split('\n')
header_row = markdown_table[0]
header_row = '| ' + ' | '.join(f'**{header.strip()}**' for header in header_row.split('|')[1:-1]) + ' |'
markdown_table[0] = header_row
markdown_table[1] = markdown_table[1].replace('|', ':|:').replace('::',':')[1:-1]
markdown_table = '\n'.join(markdown_table)

# print(markdown_table)
with open('purpose_dist_daily.md', 'w') as f:
    f.write(markdown_table)

In [28]:
purpose_df1

Unnamed: 0,District,Work,Grade School,High School,College,Workbased,Escort,Personal Business(including medical),Social & Recreational,Shopping,Meals
0,1,456,112,30,76,116,395,208,240,217,74
1,2,533,90,27,103,213,435,347,285,287,119
2,3,397,149,30,81,124,505,259,226,245,110
3,4,593,124,21,129,198,524,347,333,329,149
4,5,618,138,39,90,202,609,336,320,281,108
...,...,...,...,...,...,...,...,...,...,...,...
2455,2456,0,0,0,0,0,0,0,0,0,0
2456,2457,0,0,0,0,0,0,0,0,0,0
2457,2458,0,0,0,0,0,0,0,0,0,0
2458,2459,0,0,0,0,0,0,0,0,0,0


In [12]:
purpose_df_tod = pd.DataFrame(data = purpose_dict).T
# purpose_df_tod.drop('Other', axis=1, inplace=True)
total_sum = purpose_df_tod.applymap(lambda x: sum(x))
total_sum.reset_index(inplace=True,names='TOD')
total_sum['All Purposes'] = total_sum.sum(axis=1,numeric_only=True)
# total_sum.to_csv('puporse_tod.csv',index=False)
total_sum = total_sum.iloc[[0,1,5,2,3,4]]
total_sum

Unnamed: 0,TOD,Work,School,Escort,Personal Business & Medical,Shopping,Meals,Social & Recreational,Change Mode,Return Home,All Purposes
0,Daily,4102304,1129526,2236306,2121742,1866054,1066943,1994227,297538,8653427,23468067
1,AM,1508351,869143,449143,298029,184983,103471,228675,88417,409885,4140097
5,MD,1803735,196223,932662,1015366,902229,500414,860371,80253,3228821,9520074
2,PM,424838,47358,445677,442866,453125,234740,471288,86167,2667379,5273438
3,EV,261577,16581,358649,336933,308659,213336,416356,31816,2321762,4265669
4,EA,103803,221,50175,28548,17058,14982,17537,10885,25580,268789


In [132]:
total_sum.iloc[1:]

Unnamed: 0,TOD,Work,Grade School,High School,College,Workbased,Escort,Personal Business(including medical),Social & Recreational,Shopping,Meals,All Purposes
4,EA,98933,108,51,58,7569,50116,27991,17447,16578,13473,232324
1,AM,1465458,523772,127348,218011,64137,448197,293104,227728,181584,92456,3641795
5,MD,1152889,86006,14759,95263,1008127,914618,940923,836990,831439,329986,6211000
3,EV,220287,1469,664,14430,56608,357381,334466,415231,305540,206015,1912091
2,PM,297015,9410,3644,34265,179923,440645,433370,467219,441607,212794,2519892


In [13]:
total_sum.iloc[1:].to_csv('puporse_tod.csv',index=False)

In [141]:
cols

Index(['TOD', 'Work', 'Grade School', 'High School', 'College', 'Workbased',
       'Escort', 'Personal Business(including medical)',
       'Social & Recreational', 'Shopping', 'Meals', 'All Purposes'],
      dtype='object')

In [14]:
# total_sum.to_markdown('purpose_tod.md',index=False)
cols = total_sum.columns 
for col in cols[1:]:
    total_sum[col] = total_sum[col].apply(lambda x: '{:,.0f}'.format(x))   #Converts the int to string with commas
markdown_table = total_sum.to_markdown(index=False,floatfmt='.0f').split('\n')
header_row = markdown_table[0]
header_row = '| ' + ' | '.join(f'**{header.strip()}**' for header in header_row.split('|')[1:-1]) + ' |'
markdown_table[0] = header_row
markdown_table[1] = markdown_table[1].replace('|', ':|:').replace('::',':')[1:-1]
markdown_table = '\n'.join(markdown_table)

print(markdown_table)
with open('purpose_tod.md', 'w') as f:
    f.write(markdown_table)

| **TOD** | **Work** | **School** | **Escort** | **Personal Business & Medical** | **Shopping** | **Meals** | **Social & Recreational** | **Change Mode** | **Return Home** | **All Purposes** |
|:------:|:----------:|:----------:|:----------:|:------------------------------:|:-----------:|:----------:|:------------------------:|:--------------:|:--------------:|:---------------:|
| Daily | 4,102,304 | 1,129,526 | 2,236,306 | 2,121,742                     | 1,866,054  | 1,066,943 | 1,994,227               | 297,538       | 8,653,427     | 23,468,067     |
| AM    | 1,508,351 | 869,143   | 449,143   | 298,029                       | 184,983    | 103,471   | 228,675                 | 88,417        | 409,885       | 4,140,097      |
| MD    | 1,803,735 | 196,223   | 932,662   | 1,015,366                     | 902,229    | 500,414   | 860,371                 | 80,253        | 3,228,821     | 9,520,074      |
| PM    | 424,838   | 47,358    | 445,677   | 442,866                       | 453,12

In [16]:
eqv_file = 'shapefiles/DIST15.eqv'
distnames, distToTaz, tazToDist, numdists = readEqvFile(eqv_file)
# tazToDist.keys()

In [27]:
purpose_df1 = pd.DataFrame(data = purpose_dict['Daily'])
purpose_df1.reset_index(inplace=True,names='District')
purpose_df1['District']+=1
purpose_df1.drop('Other', axis=1, inplace=True)
purpose_df1


Unnamed: 0,District,Work,Grade School,High School,College,Workbased,Escort,Personal Business(including medical),Social & Recreational,Shopping,Meals
0,1,456,112,30,76,116,395,208,240,217,74
1,2,533,90,27,103,213,435,347,285,287,119
2,3,397,149,30,81,124,505,259,226,245,110
3,4,593,124,21,129,198,524,347,333,329,149
4,5,618,138,39,90,202,609,336,320,281,108
...,...,...,...,...,...,...,...,...,...,...,...
2455,2456,0,0,0,0,0,0,0,0,0,0
2456,2457,0,0,0,0,0,0,0,0,0,0
2457,2458,0,0,0,0,0,0,0,0,0,0
2458,2459,0,0,0,0,0,0,0,0,0,0


In [17]:
dists = pd.Series(distnames.values(),name='District Names')
purpose_df1_alt = pd.concat([purpose_df1,dists],axis=1)
purpose_df1_alt.to_csv('Purpose_daily_alt.csv',index=False) #CSV table

In [15]:
dists

0               Downtown
1                   SoMa
2     N.Beach/ Chinatown
3         Western Market
4       Mission/ Potrero
5      Noe/ Glen/ Bernal
6      Marina/ N.Heights
7               Richmond
8               Bayshore
9          Outer Mission
10        Hill Districts
11                Sunset
12             South Bay
13              East Bay
14             North Bay
dtype: object

## Tour purpose

In [3]:
eqv_file = 'shapefiles/DIST15.eqv'
distnames, distToTaz, tazToDist, numdists = readEqvFile(eqv_file)
# tazToDist.keys()
RP_HH           = "data/_household_2.dat"
RP_PERSON       = "data/_person_2.dat"
RP_TOUR         = "data/_tour_2.dat"
RP_DISAG_TRIPS  = "data/_trip_2.dat"
RP_ROWS         = [ 'Work', 'Grade School', 'High School', 'College', 'Other', 'Workbased','Escort',
                    'Personal Business(including medical)','Social & Recreational','Shopping','Meals','Total' ]
def getChampPurposeTour(row, col_dict):
    #Unable to identify input row and col_dict
    #could change it into pandas...
    # tour_parent = row[col_dict['parent']]    # should I remove parent = 0 or not? it has 8mil records of total 9 mil
    ptype = row[col_dict['pptyp']]
    xfer_purp = row[col_dict['pdpurp']]
    if xfer_purp == 1:
        return RP_ROWS[0] # Work
    elif xfer_purp == 2:
        if ptype == 7: # child age 5-15
            return RP_ROWS[1] # Grade School
        elif ptype == 6: # child age 16+
            return RP_ROWS[2] # High School
        else: 
            return RP_ROWS[3] # College
    elif xfer_purp == 3:
        return RP_ROWS[6] #Escorts
    elif xfer_purp == 4:
        return RP_ROWS[7] #Personal business
    elif xfer_purp == 5:
        return RP_ROWS[9] #Shopping
    elif xfer_purp == 6:
        return RP_ROWS[10] # Meals
    elif xfer_purp == 7:
        return RP_ROWS[8] #Social
    elif xfer_purp == 10:
        return None
    else:
        return RP_ROWS[4] # Other

def getTimePeriodTour(row, col_dict):
    '''EA = early monring(3am till 6am), AM = monring a.m. peak(6am till 9am), 
    MD = midday(9am till 3.30pm), PM = p.m peak(3.30pm till 6.30pm) and EV = evening(6.30pm till 3am)
        ---- Time is given in minutes'''
    # seg_dir = row[col_dict['half']]
    # dep_time = row[col_dict['deptm']]
    # arr_time = row[col_dict['arrtm']]
    # use_time = arr_time if seg_dir==1 else dep_time
    use_time = row[col_dict['tlvorig']]
    if use_time>=180 and use_time<=359:
        return 'EA'
    elif use_time>=360 and use_time<=539:
        return 'AM'
    elif use_time>=540 and use_time<=929:
        return 'MD'
    elif use_time>=930 and use_time<=1109:
        return 'PM'
    elif use_time>=1110 or use_time<=179:
        return 'EV'
    else:
        return None

def getResidentPurposesTour():
    # trip_df = pd.read_csv(RP_DISAG_TRIPS, sep='\t', 
    #                         usecols = ['hhno','pno','tour_id','dpurp','half','deptm','arrtm'])
    tour_df = pd.read_csv(RP_TOUR, sep='\t', usecols = ['id','hhno','pno','parent','pdpurp','tlvorig'])
    hh_df   = pd.read_csv(RP_HH, sep='\t', usecols = ['hhno','hhtaz'])
    per_df  = pd.read_csv(RP_PERSON, sep='\t', usecols = ['hhno','pno','pptyp'])

    tour_df = tour_df.merge(hh_df, how='left',on='hhno')
    tour_df = tour_df.merge(per_df, how='left',on=['pno','hhno'])
    # trip_df = trip_df.merge(tour_df, how='left', left_on='tour_id', right_on='id')
    print(tour_df.shape)
    store = pd.HDFStore("tours.h5", 'w')
    store.put('root', tour_df, format='t')
    store.close()
    # print(trip_df)
    
    purposesByTimeperiod = { "Daily":{}, "AM":{}, "PM":{}, "EV":{}, "MD":{},"EA":{}}
    for tp in purposesByTimeperiod.keys():
        for purpose in  RP_ROWS:
            if purpose == "Total": continue
            # purposesByTimeperiod[tp][purpose] = [0]*2460
            purposesByTimeperiod[tp][purpose] = [0]*15     #this shows the number of districts -- changing it to tazs (total = 2405 we take 2415)

    #Can use pandas or h5py instead of tables to do this processing
    infile = open_file("tours.h5", mode="r")
    col_names = infile.get_node('/root', 'table')._v_attrs.values_block_0_kind
    # row_nodes = infile.get_node('/root', 'table')
    # infile.close()
    col_idx_dict = dict([(col,i) for col,i in zip(col_names, list(range(len(col_names))))])        
#         print strftime("%x %X", localtime()) + " started Resident Purposes processing"
    for row in infile.get_node('/root', 'table'):
        row = row[1]
        resdist = int(tazToDist[row[col_idx_dict['hhtaz']]][0])      #matches the taz to district -- what if we remove the matching
        # resdist = int(row[col_idx_dict['hhtaz']])
        
        purpose = getChampPurposeTour(row, col_idx_dict) #what if we pass the whole df and get the whole result together??
        if purpose != None:
            timeperiod = getTimePeriodTour(row, col_idx_dict)
            # print(timeperiod)
            # print(self.purposesByTimeperiod)
            if timeperiod == "AM" or timeperiod == "PM" or timeperiod == "EV" or timeperiod == "MD" or timeperiod == "EA":
                # print(purpose,resdist)
                purposesByTimeperiod[timeperiod][purpose][resdist-1] += 1
                
            purposesByTimeperiod["Daily"][purpose][resdist-1] += 1


#         print strftime("%x %X", localtime()) + " completed."
    # print self.purposesByTimeperiod[timePeriod]

    infile.close()
    print("Done!")
    return purposesByTimeperiod

In [4]:
# tour_df1.groupby(by=['parent','pdpurp']).count()
tour_purpose_dict = getResidentPurposesTour()

(9239058, 8)
Done!


In [5]:
daily_tour_dist = tour_purpose_dict['Daily']
tour_purpose_df_tod = pd.DataFrame(data = tour_purpose_dict).T
tour_purpose_df_tod.drop('Other', axis=1, inplace=True)
tour_purpose_df_tod.drop('Workbased', axis=1, inplace=True)
total_sum = tour_purpose_df_tod.applymap(lambda x: sum(x))
total_sum.reset_index(inplace=True,names='TOD')
total_sum.to_csv('tours_tod.csv',index=False)
total_sum


Unnamed: 0,TOD,Work,Grade School,High School,College,Escort,Personal Business(including medical),Social & Recreational,Shopping,Meals
0,Daily,2849573,618811,145560,359252,1305916,1017124,1454723,874731,613368
1,AM,1508082,529437,127440,226887,279053,179582,200030,121240,50374
2,PM,111988,8564,3311,33354,208909,176512,332691,177834,127207
3,EV,63327,1019,382,10167,144714,118825,263360,97995,93585
4,MD,893993,71646,11506,82843,637500,524831,642456,473050,335337
5,EA,272183,8145,2921,6001,35740,17374,16186,4612,6865


In [6]:
purpose_df1 = pd.DataFrame(data = tour_purpose_dict['Daily'])
purpose_df1.reset_index(inplace=True,names='District')
purpose_df1['District']+=1
purpose_df1.drop('Other', axis=1, inplace=True)
purpose_df1.drop('Workbased', axis=1, inplace=True)
purpose_df1.to_csv('tour_purpose_daily.csv',index=False) #CSV table
purpose_df1


Unnamed: 0,District,Work,Grade School,High School,College,Escort,Personal Business(including medical),Social & Recreational,Shopping,Meals
0,1,27954,1804,640,5499,10000,8964,11672,9112,6135
1,2,20108,959,348,3447,6387,5601,7454,5537,4034
2,3,35498,2154,759,3701,11447,10328,13457,10501,7150
3,4,47607,2267,953,6778,14629,13277,17855,12592,9485
4,5,43328,4238,1349,5499,17881,12245,16996,10985,8258
5,6,25762,2383,815,2954,9680,7535,10500,6709,5220
6,7,38268,1462,644,3161,10220,9971,13445,9416,7221
7,8,30845,3798,986,3888,11280,9432,12464,8509,6065
8,9,27208,8171,2144,4557,18034,10309,14338,9096,5603
9,10,29887,6478,1436,6526,18318,11296,15480,9919,6327


In [7]:
markdown_table = purpose_df1.to_markdown(index=False,floatfmt='.0f').split('\n')
header_row = markdown_table[0]
header_row = '| ' + ' | '.join(f'**{header.strip()}**' for header in header_row.split('|')[1:-1]) + ' |'
markdown_table[0] = header_row
markdown_table[1] = markdown_table[1].replace('|', ':|:').replace('::',':')[1:-1]
markdown_table = '\n'.join(markdown_table)

print(markdown_table)
with open('tour_purpose_dist_daily.md', 'w') as f:
    f.write(markdown_table)

| **District** | **Work** | **Grade School** | **High School** | **College** | **Escort** | **Personal Business(including medical)** | **Social & Recreational** | **Shopping** | **Meals** |
|:-----------:|:--------:|:---------------:|:--------------:|:----------:|:---------:|:---------------------------------------:|:------------------------:|:-----------:|:--------:|
|          1 |   27954 |           1804 |           640 |      5499 |    10000 |                                   8964 |                   11672 |       9112 |    6135 |
|          2 |   20108 |            959 |           348 |      3447 |     6387 |                                   5601 |                    7454 |       5537 |    4034 |
|          3 |   35498 |           2154 |           759 |      3701 |    11447 |                                  10328 |                   13457 |      10501 |    7150 |
|          4 |   47607 |           2267 |           953 |      6778 |    14629 |                                  13

In [10]:
# total_sum.to_markdown('purpose_tod.md',index=False)
markdown_table = total_sum.to_markdown(index=False,floatfmt='.0f').split('\n')
header_row = markdown_table[0]
header_row = '| ' + ' | '.join(f'**{header.strip()}**' for header in header_row.split('|')[1:-1]) + ' |'
markdown_table[0] = header_row
markdown_table[1] = markdown_table[1].replace('|', ':|:').replace('::',':')[1:-1]
markdown_table = '\n'.join(markdown_table)

print(markdown_table)
with open('tour_purpose_tod.md', 'w') as f:
    f.write(markdown_table)

| **TOD** | **Work** | **Grade School** | **High School** | **College** | **Escort** | **Personal Business(including medical)** | **Social & Recreational** | **Shopping** | **Meals** |
|:------:|:--------:|:---------------:|:--------------:|:----------:|:---------:|:---------------------------------------:|:------------------------:|:-----------:|:--------:|
| Daily | 2849573 |         618811 |        145560 |    359252 |  1305916 |                                1017124 |                 1454723 |     874731 |  613368 |
| AM    | 1508082 |         529437 |        127440 |    226887 |   279053 |                                 179582 |                  200030 |     121240 |   50374 |
| PM    |  111988 |           8564 |          3311 |     33354 |   208909 |                                 176512 |                  332691 |     177834 |  127207 |
| EV    |   63327 |           1019 |           382 |     10167 |   144714 |                                 118825 |                  263360 