In [91]:
import pandas as pd
import urllib.request
import gpxpy
import csv
import os
import re
import numpy as np
from datetime import datetime
import geojson
from geopandas import GeoDataFrame
from shapely.geometry import Point, LineString

### This is the url to generate a response from grabhopper
##### "https://graphhopper.com/api/1/route?point=33.746717,-84.387596&point=45.047,-67.637&vehicle=car&locale=en&instructions=false&type=gpx&key=[YOUR_KEY]"


In [2]:
# path to county centroids with the endpoint coordinates for each route
coordinates = pd.read_csv('top-100-metros.csv')

In [3]:
coordinates.set_index('City', inplace=True)

In [4]:
coordinates.rename(columns={
    'Latitude': 'Y',
    'Longitude': 'X'
}, inplace=True)

In [5]:
#### build the api call w/ the url endpoints

urlStart = 'https://graphhopper.com/api/1/route?'
point = 'point='
urlEnd = '&vehicle=truck&locale=en&instructions=false&type=gpx&key=dfa731bb-f8c4-4b1c-974d-a2dfa0b4aa63'

# the starting point for each route
# this is the center of Fulton County, or Atlanta

startY = '32.111062'
startX = '-81.126410'

In [6]:
coordinates.head()

Unnamed: 0_level_0,Population,Y,X
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"New York, New York",8405837,40.712784,-74.005941
"Los Angeles, California",3884307,34.052234,-118.243685
"Chicago, Illinois",2718782,41.878114,-87.629798
"Houston, Texas",2195914,29.760427,-95.369803
"Philadelphia, Pennsylvania",1553165,39.952584,-75.165222


In [7]:
req = urlStart + point + startY + ',' + startX + '&' + point + str(coordinates.iloc[0,1]) + ',' + str(coordinates.iloc[0,2]) + urlEnd



In [8]:
req

'https://graphhopper.com/api/1/route?point=32.111062,-81.126410&point=40.712784,-74.005941&vehicle=truck&locale=en&instructions=false&type=gpx&key=dfa731bb-f8c4-4b1c-974d-a2dfa0b4aa63'

In [9]:
# make an API call for each pair of startingpoint-centroid coordinates
# save each call to individual .gpx file

for index, row in coordinates.iterrows():
    req = urlStart + point + startY + ',' + startX + '&' + point + str(row['Y']) + ',' + str(row['X']) + urlEnd
    try:
        resp = urllib.request.urlopen(req)
        gpxData = str(resp.read(), 'utf-8')
        fileName = 'savannah_' + str(index)
        saveFile = open('gpx_files/{0}.gpx'.format(fileName),'w')
        print('processed index ' + str(index))
        saveFile.write(gpxData)
        saveFile.close()
    except:
        print('bad request on index ' + str(index))
        pass

processed index New York, New York
processed index Los Angeles, California
processed index Chicago, Illinois
processed index Houston, Texas
processed index Philadelphia, Pennsylvania
processed index Phoenix, Arizona
processed index San Antonio, Texas
processed index San Diego, California
processed index Dallas, Texas
processed index San Jose, California
processed index Austin, Texas
processed index Indianapolis, Indiana
processed index Jacksonville, Florida
processed index San Francisco, California
processed index Columbus, Ohio
processed index Charlotte, North Carolina
processed index Fort Worth, Texas
processed index Detroit, Michigan
processed index El Paso, Texas
processed index Memphis, Tennessee
processed index Seattle, Washington
processed index Denver, Colorado
processed index Washington, District of Columbia
processed index Boston, Massachusetts
processed index Nashville, Tennessee
processed index Baltimore, Maryland
processed index Oklahoma City, Oklahoma
processed index Loui

In [10]:
# write csv file called merged.csv to working directory and give column names x,y,y
with open(r'gpx_files/merged/merged.csv', 'a') as f:
    writer = csv.writer(f, quoting=csv.QUOTE_NONE, escapechar=' ', lineterminator='\n')
    writer.writerow('yxtn')

In [11]:
#create a folder for your files manually
for file in os.listdir('gpx_files'):
    if file.endswith((".gpx",".mp4","wmv")):
        filePath = 'gpx_files/' + file
        n = filePath[19:]
        nm = n[:-4]
        print(filePath,nm)  
        gpx_file = open(filePath, 'r')
        gpx = gpxpy.parse(gpx_file)
        count = 0

        #iterate through rows and append each gpx row to merged csv
        for track in gpx.tracks:
            for segment in track.segments:
                for point in segment.points:
                    fields=['{0},{1},{2},{3}'.format(point.latitude, point.longitude, point.time, nm)] 
                    #Here double whitespace is removed so QGIS accepts the time format
                    re.sub(' +',' ',fields[0])
                    #Graphhopper creates quite a lot of GPX points and for this purpose every second is enough.
                    count += 1
                    if count % 2 == 0: 
                        with open(r'gpx_files/merged/merged.csv', 'a') as f:
                            writer = csv.writer(f, quoting=csv.QUOTE_NONE, escapechar=' ', lineterminator='\n')
                            writer.writerow(fields)

gpx_files/savannah_New York, New York.gpx New York, New York
gpx_files/savannah_Virginia Beach, Virginia.gpx Virginia Beach, Virginia
gpx_files/savannah_Chesapeake, Virginia.gpx Chesapeake, Virginia
gpx_files/savannah_Lubbock, Texas.gpx Lubbock, Texas
gpx_files/savannah_El Paso, Texas.gpx El Paso, Texas
gpx_files/savannah_Austin, Texas.gpx Austin, Texas
gpx_files/savannah_Gilbert, Arizona.gpx Gilbert, Arizona
gpx_files/savannah_Stockton, California.gpx Stockton, California
gpx_files/savannah_Las Vegas, Nevada.gpx Las Vegas, Nevada
gpx_files/savannah_Richmond, Virginia.gpx Richmond, Virginia
gpx_files/savannah_Durham, North Carolina.gpx Durham, North Carolina
gpx_files/savannah_Chandler, Arizona.gpx Chandler, Arizona
gpx_files/savannah_Wichita, Kansas.gpx Wichita, Kansas
gpx_files/savannah_Riverside, California.gpx Riverside, California
gpx_files/savannah_Dallas, Texas.gpx Dallas, Texas
gpx_files/savannah_Oklahoma City, Oklahoma.gpx Oklahoma City, Oklahoma
gpx_files/savannah_North Las V

In [24]:
df = pd.read_csv('gpx_files/merged/merged.csv', index_col=False)

In [25]:
df.shape

(413770, 4)

In [26]:
df.dtypes

y    float64
x    float64
t     object
n     object
dtype: object

In [27]:
df = df[['n','y','x','t']]
df['t'] = pd.to_datetime(df.t)

In [28]:
df.columns = ['n', 'y', 'x', 'timestamp']

### Get total times for each route in hours, minutes, and seconds, and ultimately convert to geojson for export

In [30]:
df['diff'] = df.groupby('n')['timestamp'].diff()


In [33]:
df['seconds'] = df['diff'].astype('timedelta64[s]')

In [35]:
df.fillna(0, inplace=True)

  """Entry point for launching an IPython kernel.


In [37]:
df['seconds_away'] = df.groupby('n')['seconds'].cumsum()

In [39]:
df['minutes_away'] = df['seconds_away'] / 60

In [40]:
df['hours_away'] = df['minutes_away'] / 60

In [59]:
df.rename(columns={'n':'city', 'timestamp':'date'}, inplace=True)

In [61]:
df.dtypes

city                     object
y                       float64
x                       float64
date             datetime64[ns]
diff            timedelta64[ns]
seconds                 float64
seconds_away            float64
minutes_away            float64
hours_away              float64
dtype: object

In [63]:
df['date'] = df['date'].dt.date


In [75]:
df['time'] = pd.to_datetime(df['diff']).dt.strftime('%H:%M:%S')

In [76]:
df['date2'] = df['date'].astype(str)

In [78]:
df['timestamp'] = pd.to_datetime(df['date2'] + ' ' + df['time'])

In [80]:
del df['date2']
del df['diff2']
del df['time']

In [86]:
df['timestamp'] = df.timestamp.apply(lambda x : (x-datetime.datetime(1970,1,1)).total_seconds())


In [87]:
df.dtypes

city                     object
y                       float64
x                       float64
date                     object
diff            timedelta64[ns]
seconds                 float64
seconds_away            float64
minutes_away            float64
hours_away              float64
timestamp               float64
dtype: object

In [92]:
df.head()

Unnamed: 0,city,y,x,date,diff,seconds,seconds_away,minutes_away,hours_away,timestamp
0,New York,32.111124,-81.126236,2019-03-04,00:00:00,0.0,0.0,0.0,0.0,1551658000.0
1,New York,32.110967,-81.126193,2019-03-04,00:00:11,11.0,11.0,0.183333,0.003056,1551658000.0
2,New York,32.110359,-81.126906,2019-03-04,00:00:57,57.0,68.0,1.133333,0.018889,1551658000.0
3,New York,32.109039,-81.128615,2019-03-04,00:02:11,131.0,199.0,3.316667,0.055278,1551658000.0
4,New York,32.100306,-81.140469,2019-03-04,00:14:48,888.0,1087.0,18.116667,0.301944,1551658000.0


In [94]:
# now convert to geodataframe and write to geojson

# Zip the coordinates into a point object and convert to a GeoDataFrame
geometry = [Point(xy) for xy in zip(df.x, df.y)]
geo = GeoDataFrame(df, geometry=geometry)

# Aggregate these points with the GroupBy
geo = geo.groupby(['city'])['geometry'].apply(lambda x: LineString(x.tolist()))
geo = GeoDataFrame(geo, geometry='geometry')

In [95]:
geo.reset_index(inplace=True)

In [96]:
geo.head()

Unnamed: 0,city,geometry
0,Albuquerque,"LINESTRING (-81.12623599999999 32.111124, -81...."
1,Anaheim,"LINESTRING (-81.12623599999999 32.111124, -81...."
2,Anchorage,"LINESTRING (-81.12623599999999 32.111124, -81...."
3,Arlington,"LINESTRING (-81.12623599999999 32.111124, -81...."
4,Atlanta,"LINESTRING (-81.12623599999999 32.111124, -81...."


In [98]:
df['minutes'] = df['seconds'] / 60
df['hours'] = df['minutes'] / 60

In [100]:
# original df groupby fips and give cumsum columns for seconds minutes and hours
grouped_df = df.groupby('city').agg({'seconds': 'sum',
                             'minutes': 'sum',
                             'hours'  : 'sum'})

In [102]:
grouped_df.head()

Unnamed: 0_level_0,seconds,minutes,hours
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Albuquerque,120592.0,2009.866667,33.497778
Anaheim,176709.0,2945.15,49.085833
Anchorage,330847.0,5514.116667,91.901944
Arlington,77551.0,1292.516667,21.541944
Atlanta,20142.0,335.7,5.595


In [105]:
# left merge geo with grouped_df on fips per http://geopandas.org/mergingdata.html#attribute-joins
geo_df = geo.merge(grouped_df, on='city')

In [106]:
geo_df.head()

Unnamed: 0,city,geometry,seconds,minutes,hours
0,Albuquerque,"LINESTRING (-81.12623599999999 32.111124, -81....",120592.0,2009.866667,33.497778
1,Anaheim,"LINESTRING (-81.12623599999999 32.111124, -81....",176709.0,2945.15,49.085833
2,Anchorage,"LINESTRING (-81.12623599999999 32.111124, -81....",330847.0,5514.116667,91.901944
3,Arlington,"LINESTRING (-81.12623599999999 32.111124, -81....",77551.0,1292.516667,21.541944
4,Atlanta,"LINESTRING (-81.12623599999999 32.111124, -81....",20142.0,335.7,5.595


In [107]:
# write geo to geojson with non-geometry columns as properties
geo_df.to_file('savannah-metros-routes.geojson', driver="GeoJSON")

  with fiona.drivers():


In [108]:
geo_df.shape

(98, 5)

In [111]:
geo_df1 = geo_df[0:32]

In [113]:
geo_df1.shape

(32, 5)