In [4]:
import folium
import pandas as pd
import geopandas as gpd
from shapely import wkt
import pathlib

In [5]:
pattern_df = pd.read_parquet("../cta-stop-etl/out/pids/95.parquet")
# grab one trip to try stuff out
single_trip_df = pattern_df[pattern_df['unique_trip_vehicle_day'] == '7295.0235318404101004820292023-01-01']
single_trip_gdf = gpd.GeoDataFrame(
        single_trip_df,
        geometry=gpd.GeoSeries.from_xy(
            x=single_trip_df.loc[:, "lon"], y=single_trip_df.loc[:, "lat"], crs="EPSG:4326"
        ))

In [6]:
single_trip_points = [[point.xy[1][0], point.xy[0][0]] for point in single_trip_gdf.geometry ]

In [8]:
segments_df = pd.read_parquet("../cta-stop-etl/out/patterns/pid_95_segment.parquet")
segments_df['geometry'] = segments_df['geometry'].apply(wkt.loads)
segments_df = gpd.GeoDataFrame(segments_df, crs='epsg:4326')

In [10]:
pattern_segements_95 = gpd.GeoSeries(segments_df["geometry"]).simplify(tolerance=0.001)
pattern_segements_95 = pattern_segements_95.to_json()

In [11]:
m = folium.Map(
    location=[41.83491987636846, -87.62004994799588],
    zoom_start=12,
    zoomSnap=0.5,
    tiles=None,
    min_zoom=10,
    max_zoom=12,
    min_lat=41.6,
    max_lat=42.1,
    min_lon=-87.27481026390364,
    max_lon=-87.981026390364,
    max_bounds=True,
)
base_map = folium.FeatureGroup(name="Basemap", overlay=True, control=False)
folium.TileLayer(tiles="cartodb positron").add_to(base_map)
base_map.add_to(m)
for loc in single_trip_points:
    m.add_child(folium.Marker(location = loc))

folium.PolyLine(
    locations=single_trip_points,
    color='green',
    weight=2
).add_to(m)

folium.GeoJson(data=pattern_segements_95, style_function=lambda x: {"fillColor": "orange"}).add_to(m)

m


In [6]:
pattern_df = pd.read_parquet("../cta-stop-etl/out/pids/95.parquet")
# grab one trip to try stuff out
single_trip_df = pattern_df[pattern_df['unique_trip_vehicle_day'] == '7295.0235318404101004820292023-01-01']
single_trip_gdf = gpd.GeoDataFrame(
        single_trip_df,
        geometry=gpd.GeoSeries.from_xy(
            x=single_trip_df.loc[:, "lon"], y=single_trip_df.loc[:, "lat"], crs="EPSG:4326"
        ))
single_trip_gdf = single_trip_gdf.sort_values('data_time')
single_trip_gdf['id'] = single_trip_gdf.index
single_trip_gdf = single_trip_gdf[['id','unique_trip_vehicle_day','vid','data_time','geometry']]

segments_df = pd.read_parquet("../cta-stop-etl/out/patterns/pid_95_segment.parquet")
segments_df['geometry'] = segments_df['geometry'].apply(wkt.loads)
segments_gdf = gpd.GeoDataFrame(segments_df, crs='epsg:4326')
segments_gdf = segments_gdf[['segments','geometry']]

single_trip_gdf['bus_location'] = single_trip_gdf.geometry
merged_df = segments_gdf.sjoin(single_trip_gdf, how="inner", predicate="contains")

merged_df['RN'] = merged_df.groupby(['id']).cumcount() + 1
merged_df_test = merged_df[merged_df['RN'] == 1]
merged_df_test.reset_index(drop=True, inplace=True)

merged_df_test["prev_segment"] = merged_df_test["segments"]-1
merged_df_test["seg_combined"] = (merged_df_test["prev_segment"] + merged_df_test["segments"]) / 2

bus_location_df = merged_df_test[['seg_combined','unique_trip_vehicle_day','data_time','geometry','bus_location']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [7]:
bus_location_df

Unnamed: 0,seg_combined,unique_trip_vehicle_day,data_time,geometry,bus_location
0,2.5,7295.0235318404101004820292023-01-01,2023-01-01 05:57:00,"POLYGON ((-87.63200 41.91079, -87.63206 41.910...",POINT (-87.63219 41.91124)
1,29.5,7295.0235318404101004820292023-01-01,2023-01-01 06:02:00,"POLYGON ((-87.64548 41.91052, -87.64554 41.910...",POINT (-87.64528 41.91097)
2,99.5,7295.0235318404101004820292023-01-01,2023-01-01 06:07:00,"POLYGON ((-87.67203 41.91012, -87.67209 41.910...",POINT (-87.67199 41.91057)
3,128.5,7295.0235318404101004820292023-01-01,2023-01-01 06:12:00,"POLYGON ((-87.68217 41.90995, -87.68223 41.909...",POINT (-87.68232 41.91039)
4,199.5,7295.0235318404101004820292023-01-01,2023-01-01 06:17:00,"POLYGON ((-87.71234 41.90965, -87.71240 41.909...",POINT (-87.71238 41.91010)
5,267.5,7295.0235318404101004820292023-01-01,2023-01-01 06:27:00,"POLYGON ((-87.73976 41.90931, -87.73981 41.909...",POINT (-87.73970 41.90976)
6,314.5,7295.0235318404101004820292023-01-01,2023-01-01 06:32:00,"POLYGON ((-87.76281 41.90901, -87.76286 41.909...",POINT (-87.76342 41.90945)
7,378.5,7295.0235318404101004820292023-01-01,2023-01-01 06:37:00,"POLYGON ((-87.78939 41.90865, -87.78945 41.908...",POINT (-87.78976 41.90908)
8,414.5,7295.0235318404101004820292023-01-01,2023-01-01 06:52:00,"POLYGON ((-87.80344 41.90852, -87.80350 41.908...",POINT (-87.80401 41.90910)
9,414.5,7295.0235318404101004820292023-01-01,2023-01-01 06:47:00,"POLYGON ((-87.80344 41.90852, -87.80350 41.908...",POINT (-87.80401 41.90910)


In [8]:
segments = gpd.GeoSeries(bus_location_df["geometry"]).simplify(tolerance=0.001)
segments = segments.to_json()

In [9]:
bus_location = [[point.xy[1][0], point.xy[0][0]] for point in bus_location_df.bus_location ]

In [13]:
m = folium.Map(
    location=[41.83491987636846, -87.62004994799588],
    zoom_start=12,
    zoomSnap=0.5,
    tiles=None,
    min_zoom=10,
    max_zoom=12,
    min_lat=41.6,
    max_lat=42.1,
    min_lon=-87.27481026390364,
    max_lon=-87.981026390364,
    max_bounds=True,
)
base_map = folium.FeatureGroup(name="Basemap", overlay=True, control=False)
folium.TileLayer(tiles="cartodb positron").add_to(base_map)
base_map.add_to(m)
for loc in bus_location:
    m.add_child(folium.Marker(location = loc))


folium.GeoJson(data=segments, style_function=lambda x: {"fillColor": "orange"}).add_to(m)

m

In [46]:
bus_location_df['typ'] = 'B'

bus_location_df.rename(columns={'bus_location':'location'}, inplace=True)
bus_location_df = bus_location_df[['seg_combined','typ','location','data_time']]

stops_df = pd.read_parquet("../cta-stop-etl/out/patterns/pid_95_stop.parquet")

stops_df.rename(columns={'segment':'seg_combined', 'geometry':'location'}, inplace=True)

stops_df['data_time'] = None
stops_df = stops_df[['seg_combined','typ','location','data_time']]


final_df = pd.concat([bus_location_df, stops_df], axis=0)
final_df = final_df.sort_values(['seg_combined','data_time']).reset_index(drop=True)

In [47]:

final_df['location'] = final_df['location'].astype(str)
final_df['location'] = final_df['location'].apply(wkt.loads)

final_gdf = gpd.GeoDataFrame(final_df, geometry='location', crs='epsg:4326')

In [48]:
final_gdf

Unnamed: 0,seg_combined,typ,location,data_time
0,0.0,S,POINT (-87.63122 41.91130),
1,1.0,W,POINT (-87.63129 41.91124),
2,2.0,W,POINT (-87.63139 41.91124),
3,2.5,B,POINT (-87.63219 41.91124),2023-01-01 05:57:00
4,3.0,W,POINT (-87.63200 41.91124),
...,...,...,...,...
443,432.0,W,POINT (-87.80443 41.91072),
444,433.0,W,POINT (-87.80400 41.91073),
445,434.0,W,POINT (-87.80395 41.90940),
446,435.0,W,POINT (-87.80401 41.90928),


In [49]:

bus = final_gdf[final_gdf['typ'] == 'B']
bus_loc = [[point.xy[1][0], point.xy[0][0]] for point in bus.geometry]

stop = final_gdf[final_gdf['typ'] == 'S']
stop_loc = [[point.xy[1][0], point.xy[0][0]] for point in stop.geometry]

other = final_gdf[final_gdf['typ'] == 'W']
other_loc = [[point.xy[1][0], point.xy[0][0]] for point in other.geometry]

In [64]:
RADIUS = 3

m = folium.Map(
    location=[41.83491987636846, -87.62004994799588],
    zoom_start=12,
    zoomSnap=0.5,
    tiles=None,
    min_zoom=10,
    max_zoom=12,
    min_lat=41.6,
    max_lat=42.1,
    min_lon=-87.27481026390364,
    max_lon=-87.981026390364,
    max_bounds=True,
)
base_map = folium.FeatureGroup(name="Basemap", overlay=True, control=False)
folium.TileLayer(tiles="cartodb positron").add_to(base_map)
base_map.add_to(m)

for loc in bus_loc:
    m.add_child(folium.CircleMarker(location = loc, radius = RADIUS, color="red", fill=True))

for loc in stop_loc:
    m.add_child(folium.CircleMarker(location = loc, radius = RADIUS, color="green", fill=True))


#folium.GeoJson(data=segments, style_function=lambda x: {"fillColor": "orange"}).add_to(m)

m

In [70]:
df = pd.read_csv("../../test_full_pattern.csv")

In [82]:
df.groupby(["unique_trip_vehicle_day",'typ']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,seg_combined,location,data_time
unique_trip_vehicle_day,typ,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
7295.0235314066107561419402023-01-04,B,5,5,5
7295.0235314066107561419402023-01-04,S,73,73,0
7295.0235314066107561419402023-01-04,W,364,364,0
7295.0235314066107561419402023-01-05,B,9,9,9
7295.0235314066107561419402023-01-05,S,73,73,0
...,...,...,...,...
7295.02353141648835741081112023-01-05,S,73,73,0
7295.02353141648835741081112023-01-05,W,364,364,0
7295.02353141648835741081142023-01-03,B,16,16,16
7295.02353141648835741081142023-01-03,S,73,73,0


In [78]:
df.tail()

Unnamed: 0,seg_combined,typ,location,data_time,unique_trip_vehicle_day
45030,432.0,W,POINT (-87.80443 41.91072),,7295.02353141648835741081142023-01-03
45031,433.0,W,POINT (-87.804 41.91073),,7295.02353141648835741081142023-01-03
45032,434.0,W,POINT (-87.80395 41.9094),,7295.02353141648835741081142023-01-03
45033,435.0,W,POINT (-87.804012 41.909284),,7295.02353141648835741081142023-01-03
45034,436.0,S,POINT (-87.804008 41.909094),,7295.02353141648835741081142023-01-03


In [79]:
test_one = df[df["unique_trip_vehicle_day"]== "7295.02353141648835741081142023-01-03"]

test_one['location'] = test_one['location'].astype(str)
test_one['location'] = test_one['location'].apply(wkt.loads)

test_one = gpd.GeoDataFrame(test_one, geometry='location', crs='epsg:4326')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_one['location'] = test_one['location'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_one['location'] = test_one['location'].apply(wkt.loads)


In [80]:
bus = test_one[test_one['typ'] == 'B']
bus_loc = [[point.xy[1][0], point.xy[0][0]] for point in bus.geometry]

stop = test_one[test_one['typ'] == 'S']
stop_loc = [[point.xy[1][0], point.xy[0][0]] for point in stop.geometry]

other = test_one[test_one['typ'] == 'W']
other_loc = [[point.xy[1][0], point.xy[0][0]] for point in other.geometry]

In [81]:
RADIUS = 3

m = folium.Map(
    location=[41.83491987636846, -87.62004994799588],
    zoom_start=12,
    zoomSnap=0.5,
    tiles=None,
    min_zoom=10,
    max_zoom=12,
    min_lat=41.6,
    max_lat=42.1,
    min_lon=-87.27481026390364,
    max_lon=-87.981026390364,
    max_bounds=True,
)
base_map = folium.FeatureGroup(name="Basemap", overlay=True, control=False)
folium.TileLayer(tiles="cartodb positron").add_to(base_map)
base_map.add_to(m)

for loc in bus_loc:
    m.add_child(folium.CircleMarker(location = loc, radius = RADIUS, color="red", fill=True))

for loc in stop_loc:
    m.add_child(folium.CircleMarker(location = loc, radius = RADIUS, color="green", fill=True))


#folium.GeoJson(data=segments, style_function=lambda x: {"fillColor": "orange"}).add_to(m)

m

In [25]:
# get common pid for two routes with same bus stop
all_trips_12_11 = pd.read_parquet("../cta-stop-etl/out/parquets/2023-12-11.parquet")
print("172", all_trips_12_11[all_trips_12_11['rt'] == '172']['pid'].value_counts())
print("171", all_trips_12_11[all_trips_12_11['rt'] == '171']['pid'].value_counts())

#171
df_14111 = pd.read_parquet("../cta-stop-etl/out/patterns_raw/pid_14111_raw.parquet")

#172
df_14103 = pd.read_parquet("../cta-stop-etl/out/patterns_raw/pid_14103_raw.parquet")

gdf_14111 = gpd.GeoDataFrame(
            df_14111,
            geometry=gpd.GeoSeries.from_xy(
                x=df_14111.loc[:, "lon"], y=df_14111.loc[:, "lat"], crs="EPSG:4326"
            ),
        )

gdf_14111[gdf_14111['typ'] == 'S'].explore()

df_14103 = gpd.GeoDataFrame(
            df_14103,
            geometry=gpd.GeoSeries.from_xy(
                x=df_14103.loc[:, "lon"], y=df_14103.loc[:, "lat"], crs="EPSG:4326"
            ),
        )

df_14103[df_14103['typ'] == 'S'].explore()

In [27]:
# 14111 is 171, 14103 is 172
# they both have a stop at 60th Street & Kenwood
# both have stpid 17902
gdf_14111[gdf_14111['stpnm'] == '60th Street & Kenwood']
df_14103[df_14103['stpnm'] == '60th Street & Kenwood']

Unnamed: 0,seq,lat,lon,typ,stpid,stpnm,pdist,geometry
7,8,41.786048,-87.593325,S,17902,60th Street & Kenwood,1141.0,POINT (-87.59333 41.78605)


In [35]:
test = gpd.read_parquet("../cta-stop-etl/out/test/pid_14111_test_trips.parquet")
test


Unnamed: 0,unique_trip_vehicle_day,seg_combined,typ,stpid,p_stp_id,geometry,bus_stop_time,bus_location_time
0,17114111.02350517678834855510972023-01-04,0.0,S,14020,14111-14020,POINT (-87.59742 41.78591),NaT,NaT
1,17114111.02350517678834855510972023-01-04,3.0,S,15433,14111-15433,POINT (-87.59647 41.78597),NaT,NaT
2,17114111.02350517678834855510972023-01-04,7.0,S,17902,14111-17902,POINT (-87.59333 41.78605),NaT,NaT
3,17114111.02350517678834855510972023-01-04,10.0,S,17423,14111-17423,POINT (-87.59142 41.78633),NaT,NaT
4,17114111.02350517678834855510972023-01-04,14.5,B,,,POINT (-87.59322 41.78791),NaT,2023-01-04 06:37:00
...,...,...,...,...,...,...,...,...
1829,17114111.02350518201000588511872023-01-06,59.0,S,1518,14111-1518,POINT (-87.58385 41.79556),2023-01-06 15:29:34.593340596,NaT
1830,17114111.02350518201000588511872023-01-06,62.0,S,1520,14111-1520,POINT (-87.58391 41.79847),2023-01-06 15:30:45.564994609,NaT
1831,17114111.02350518201000588511872023-01-06,65.0,S,1521,14111-1521,POINT (-87.58393 41.79953),2023-01-06 15:31:14.165939703,NaT
1832,17114111.02350518201000588511872023-01-06,68.5,B,,,POINT (-87.58312 41.79962),NaT,2023-01-06 15:32:00
