In [1]:
import pandas as pd
import gtfs_functions as gtfs

pd.set_option('display.max_rows', 100)

In [2]:
apc_df =  pd.read_parquet('./carta-apc.parquet/2021_10_24_gtfs_days', engine='pyarrow').sort_values('transit_date').reset_index(drop=True)
apc_df = apc_df[apc_df['gtfs_trip_id']>='204888020']
routes, stops, stop_times, trips, shapes = gtfs.import_gtfs('./raw/20211024.zip', busiest_date=False)


In [3]:
#works well to remove all null time_actual_arrive and associated stops
apc_df = apc_df.dropna(subset=['map_latitude', 'map_longitude'])
apc_df_w_10C = apc_df[(apc_df.route_id != '10C')]

In [4]:
def check_stops_in_order(trip):
    trip = trip.sort_values('time_actual_arrive')
    if trip.iloc[0].first_last_stop !=1:
        return 1
    elif trip.iloc[-1].first_last_stop !=1:
        return 1
    else:
        return 0

In [5]:
def find_trips(series):
    return (series.to_list())

def stop_len(series):
    return (len(series.to_list()))

In [6]:
shape_alltrips = trips.groupby('shape_id').agg({"trip_id": find_trips}).reset_index()
shapes_10C = shape_alltrips[(shape_alltrips.shape_id >= 'shp-10C') & (shape_alltrips.shape_id <= 'shp-10D')]
#removing shape-10-C
shape_alltrips = shape_alltrips[~((shape_alltrips.shape_id >= 'shp-10C') & (shape_alltrips.shape_id <= 'shp-10D'))]

# apc_shape_alltrips = apc_df.groupby('gtfs_shape_id').agg({"gtfs_trip_id": find_trips}).reset_index()
trip_stop_len = apc_df.groupby(['gtfs_shape_id', 'gtfs_trip_id', 'transit_date']).count().reset_index()

In [7]:
def func1(grp):
    grp = grp.sort_values('stop_id', ascending=False)
    return(grp.iloc[0])



In [8]:
trip_per_shape = trip_stop_len[['gtfs_shape_id', 'gtfs_trip_id', 'transit_date', 'stop_id']].groupby('gtfs_shape_id').apply(func1).reset_index(drop=True)

#check if gtfs stops are matching with apc stops
gtfs_trip_stop_count = stop_times.groupby('trip_id').count().reset_index()[['trip_id', 'stop_id']]

merged_apc_gtfs = trip_per_shape.merge(gtfs_trip_stop_count, left_on='gtfs_trip_id', right_on='trip_id', how='left')
merged_apc_gtfs['diff'] = merged_apc_gtfs['stop_id_x'] - merged_apc_gtfs['stop_id_y']

#find differences in gtfs and apc stops
diff = merged_apc_gtfs[merged_apc_gtfs['diff']!=0].rename({'stop_id_y': 'gtfs_stops', 'stop_id_x': 'apc_stops'}, axis=1)

In [9]:
diff

Unnamed: 0,gtfs_shape_id,gtfs_trip_id,transit_date,apc_stops,trip_id,gtfs_stops,diff


In [10]:
import pandas as pd
import geopandas as gpd
import gtfs_functions as gtfs
import pandas as pd,dateparser
import glob,dateparser
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import gtfs_kit as gk
import os,sys
import requests,json
import plotly.express as px
import numpy as np
from shapely.geometry import Point,LineString,MultiLineString
pd.set_option('display.max_columns', None)

In [11]:
def request_path(geom):
    initial='http://127.0.0.1:5000/match/v1/driving/'
    end='?overview=full&annotations=true&geometries=geojson'
    requests=[]
    l=[list(geom.coords)]
    allcoords=[item for sublist in l for item in sublist]
    maxelements=9999 #max coordinates
    overlap=2 #overlap        
    x = [allcoords[i:i+maxelements] for i in range(0, len(allcoords), maxelements-overlap)] #all requests
    for line in x:
        y=[str(item).strip('()').replace(" ", "") for item in line]
        sep=';'
        querypart=sep.join(y)
        radius="&radiuses="
        for entry in y:
            radius=radius+'25;'
        radius=radius.strip(';')
#             approaches="&approaches="
#             for entry in y:
#                 approaches=approaches+'curb;'
        #approaches=approaches.strip(';')
        requests.append(initial+querypart+end+radius)
        print(requests)
    return requests     

In [12]:
def return_new_line(reqs):
    url=reqs[0]
    r=requests.get(url)
    data=r.json()
    l=LineString(data['matchings'][0]['geometry']['coordinates'])
    return l

### map matching for stops (nearby streets)

In [14]:
def get_stop_url(geom):
    # print(df)
    # geom = df.geometry
    initial='http://127.0.0.1:5000/nearest/v1/driving/'
    end='?number=1'
    point = str(geom.x)+","+str(geom.y)
    requests2 = []
    requests2.append(initial+point+end)
    return requests2
    

In [15]:
def nearby_points(reqs):
    url=reqs[0]
    r=requests.get(url)
    data=r.json()
    l=Point(data['waypoints'][0]['location'])
    return l

In [16]:
stop_times = gpd.GeoDataFrame(stop_times)
stop_times['url'] = stop_times.geometry.apply(get_stop_url)

In [17]:
stop_times.head(1)

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint,route_id,service_id,direction_id,shape_id,stop_code,stop_name,stop_desc,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding,geometry,url
0,204888020,32663.0,32663.0,18,18,,0,0,5585.64,0,1,1,0,shp-1-04,3074,MARKET + 12TH,,,,,,,2,POINT (-85.30907 35.04235),[http://127.0.0.1:5000/nearest/v1/driving/-85....


In [20]:
stops = gpd.GeoDataFrame(stops)
stops['url'] = stops.geometry.apply(get_stop_url)
stops['new_geom']=stops.url.apply(nearby_points)

In [21]:
stops.head(1)

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding,geometry,url,new_geom
0,12,2201,MARKET + FAMILY DOLLAR,,,,,,,2,POINT (-85.30953 35.05070),[http://127.0.0.1:5000/nearest/v1/driving/-85....,POINT (-85.309564 35.050702)


In [22]:
stops = stops.drop(['geometry', 'url'], axis=1)
stops.rename(columns ={'new_geom':'geometry'}, inplace=True)

In [24]:
# stop_times_upd = pd.merge(stop_times, stops[['stop_id', 'new_geom']], on=['stop_id'], how='left')
# stop_times_upd = stop_times_upd.drop(['geometry', 'url'], axis=1)
# stop_times_upd.rename(columns ={'new_geom':'geometry'}, inplace=True)

In [25]:
stops['stop_lat'] = ''
stops['stop_lon'] = ''

for i, stop in stops.iterrows():
    stops.loc[i, 'stop_lat'] = stop.geometry.y
    stops.loc[i, 'stop_lon'] = stop.geometry.x

In [26]:
#saving matched stops
stops.drop(['geometry'], axis=1).to_csv(r'stops_matched.txt', header=True, index=None, sep=',', mode='a')

# map matching for paths/shapes

In [27]:
shapes['url']=shapes.geometry.apply(request_path)
shapes['urllength']=shapes['url'].apply(len)
shapes['n_geom']=shapes.url.apply(return_new_line)

In [None]:
shapes2=shapes.merge(trips, on='shape_id',how='left').drop(['url','urllength','trip_id','geometry','n_geom'],axis=1).drop_duplicates().reset_index().drop(['index'],axis=1)
shapes2=shapes2.merge(shapes,on='shape_id',how='inner')

In [None]:
shapes_actual = pd.read_csv('./gtfs_data/2022-06-10/shapes.txt', sep=',').astype(str)

In [None]:
shapes_actual.head(1)

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence
0,1131953847,35.10479,-85.1601,1


In [None]:
# shapes3 = shapes2[:2]
# import shapely.wkt as wkt
# list(shapes3.iloc[0].n_geom.coords)[0][0]
shapes3 = shapes2
temp_df = pd.DataFrame(columns = ['shape_id', 'shape_pt_lat', 'shape_pt_lon'])
# temp_df2 = pd.DataFrame(columns = ['shape_id', 'shape_pt_lat', 'shape_pt_lon'])
count=0
for idx, row in shapes3.iterrows():
    l1 = list(shapes3.iloc[idx].geometry.coords)
    l2 = list(shapes3.iloc[idx].n_geom.coords)
    for pt in range(len(l1)):
        # print(pt[1], pt[0])
        temp_df.loc[count, 'shape_id'] = shapes3.iloc[idx].shape_id
        temp_df.loc[count, 'shape_pt_lat'] = l1[pt][1]
        temp_df.loc[count, 'shape_pt_lon'] = l1[pt][0]
        temp_df.loc[count, 'shape_pt_lat_new'] = l2[pt][1]
        temp_df.loc[count, 'shape_pt_lon_new'] = l2[pt][0]

        count+=1


In [None]:
def linestring_to_points(line):
    return [line.coords]

shapes3['points'] = shapes3.apply(lambda l: linestring_to_points(l['geometry']),axis=1)

In [None]:
temp_df.head()

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_lat_new,shape_pt_lon_new
0,1131953806,35.048828,-85.309547,35.048829,-85.309511
1,1131953806,35.048828,-85.309547,35.048829,-85.309511
2,1131953806,35.048828,-85.309547,35.048829,-85.309511
3,1131953806,35.048693,-85.309563,35.048695,-85.309504
4,1131953806,35.048527,-85.309547,35.048529,-85.309495


In [None]:
temp_df = temp_df.astype(str)
shapes_mod = temp_df.merge(shapes_actual, on=['shape_id', 'shape_pt_lat', 'shape_pt_lon']).drop(['shape_pt_lat', 'shape_pt_lon'], axis=1).rename(columns={'shape_pt_lat_new': 'shape_pt_lat', 'shape_pt_lon_new': 'shape_pt_lon' })

In [None]:
shapes_mod

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence
0,1131953806,35.048829,-85.309511,1
1,1131953806,35.048829,-85.309511,2
2,1131953806,35.048829,-85.309511,3
3,1131953806,35.048829,-85.309511,1
4,1131953806,35.048829,-85.309511,2
...,...,...,...,...
16741,1131953873,35.099139,-85.280246,521
16742,1131953873,35.099139,-85.280246,522
16743,1131953873,35.099061,-85.280328,523
16744,1131953873,35.098958,-85.280403,524


In [None]:
shapes_mod.to_csv(r'shapes_matched.txt', header=True, index=None, sep=',', mode='a')