In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import json
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString

In [12]:
if os.path.exists('/content/drive/My Drive/Capstone/Data'):
    base_dir = '/content/drive/My Drive/Capstone/Data'
else:
    base_dir = os.path.abspath(os.path.join(os.getcwd(), '..', 'Data'))

/content/drive/My Drive/Capstone/Codes


In [None]:
def count_auth(loc_df, auth_df):
    """
    Counts the number of authentications occurring between each pair of consecutive GPS timestamps
    for each bus ID.

    Args:
        loc_df (pd.DataFrame): DataFrame containing GPS location data. Must include 'bus_number',
                               'date', and 'time' columns.
        auth_df (pd.DataFrame): DataFrame containing authentication data. Must include 'bus_number',
                                'date', and 'time' columns.

    Returns:
        pd.DataFrame: The modified `loc_df` with an added column 'auth_count_in_interval' that contains
                      the count of authentications between successive GPS timestamps.
    """
    loc_df = loc_df.drop_duplicates()
    loc_df['datetime'] = pd.to_datetime(loc_df['date'].astype(str) + ' ' + loc_df['time'].astype(str))
    auth_df['datetime'] = pd.to_datetime(auth_df['date'].astype(str) + ' ' + auth_df['time'].astype(str))

    loc_df['auth_count_in_interval'] = 0

    for bus_id, loc_group in loc_df.groupby('bus_number'):
        loc_group = loc_group.sort_values('datetime').reset_index()
        auth_times = auth_df[auth_df['bus_number'] == bus_id]['datetime']

        for i in range(len(loc_group)):
            current_time = loc_group.loc[i, 'datetime']

            if i == 0:
                count = auth_times[auth_times < current_time].count()
            else:
                prev_time = loc_group.loc[i - 1, 'datetime']
                count = auth_times[(auth_times > prev_time) & (auth_times <= current_time)].count()

            original_index = loc_group.loc[i, 'index']
            loc_df.at[original_index, 'auth_count_in_interval'] = count
    return loc_df

In [None]:
loc_df = pd.read_excel(os.path.join(base_dir, "line_29/line29_loc.xlsx"))
auth_df = pd.read_excel(os.path.join(base_dir, "line_29/line29_auth_all.xlsx"))
loc_df_upd = count_auth(loc_df, auth_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  loc_df['datetime'] = pd.to_datetime(loc_df['date'].astype(str) + ' ' + loc_df['time'].astype(str))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  loc_df['auth_count_in_interval'] = 0


In [None]:
loc_df_upd.to_excel(os.path.join(base_dir, "line_29/line29_auth_count.xlsx"), index=False)

In [None]:
geometry = [Point(xy) for xy in zip(loc_df_upd['longitude'], loc_df_upd['latitude'])]
auth_loc = gpd.GeoDataFrame(loc_df_upd, geometry=geometry, crs="EPSG:4326")

auth_loc.to_file(os.path.join(base_dir, "line_29/line29_auth_count.geojson"), driver="GeoJSON")

In [6]:
auth_loc = gpd.read_file(os.path.join(base_dir, "line_29/line29_auth_count.geojson"))
stops = gpd.read_file(os.path.join(base_dir, "line_29/29_stops.geojson"))
with open(os.path.join(base_dir, "line_29/29_items.json"), "r", encoding="utf-8") as f:
    items = json.load(f)



In [7]:
stops = stops.to_crs(epsg=32638)
fwd_stop1 = items[0]['essential_stop_id_1']
fwd_stop2 = items[0]['essential_stop_id_2']
bwd_stop1 = items[1]['essential_stop_id_1']
bwd_stop2 = items[1]['essential_stop_id_2']

buf_fwd_stop1 = stops[stops['stop_id'] == fwd_stop1].geometry.iloc[0].buffer(500)
buf_fwd_stop2 = stops[stops['stop_id'] == fwd_stop2].geometry.iloc[0].buffer(100)
buf_bwd_stop1 = stops[stops['stop_id'] == bwd_stop1].geometry.iloc[0].buffer(500)
buf_bwd_stop2 = stops[stops['stop_id'] == bwd_stop2].geometry.iloc[0].buffer(100)

In [8]:
auth_loc['datetime'] = pd.to_datetime(auth_loc['datetime'])
auth_loc = auth_loc.sort_values(by=['bus_number', 'datetime']).reset_index(drop=True)
auth_loc = auth_loc.to_crs(epsg=32638)

In [9]:
data = auth_loc[auth_loc['bus_number'] == 5481]
data

Unnamed: 0,bus_number,location,hyperlink,latitude,longitude,week_number,weekday,date,timestamp,datetime,auth_count_in_interval,geometry
0,5481,"Թեվոսյան փողոց, Yerevan, AM","https://www.google.com/maps?q=40.18634,44.57674",40.18634,44.57674,Week 1,Monday,2025-04-07,06:30-07:00,2025-04-07 06:52:58,0,POINT (463968.997 4448525.347)
1,5481,"Թեվոսյան փողոց, Yerevan, AM","https://www.google.com/maps?q=40.18609,44.57689",40.18609,44.57689,Week 1,Monday,2025-04-07,06:30-07:00,2025-04-07 06:57:13,0,POINT (463981.634 4448497.538)
2,5481,"Թեվոսյան փողոց, Yerevan, AM","https://www.google.com/maps?q=40.18609,44.57689",40.18609,44.57689,Week 1,Monday,2025-04-07,07:00-07:30,2025-04-07 07:01:56,0,POINT (463981.634 4448497.538)
3,5481,"Թեվոսյան փողոց, Yerevan, AM","https://www.google.com/maps?q=40.18609,44.57689",40.18609,44.57689,Week 1,Monday,2025-04-07,07:00-07:30,2025-04-07 07:03:57,0,POINT (463981.634 4448497.538)
4,5481,"Թեվոսյան փողոց, Yerevan, AM","https://www.google.com/maps?q=40.18609,44.57689",40.18609,44.57689,Week 1,Monday,2025-04-07,07:00-07:30,2025-04-07 07:11:08,0,POINT (463981.634 4448497.538)
...,...,...,...,...,...,...,...,...,...,...,...,...
7381,5481,"Թեվոսյան փողոց, Yerevan, AM","https://www.google.com/maps?q=40.18925,44.58530",40.18925,44.58530,Week 2,Sunday,2025-04-20,22:30-23:00,2025-04-20 22:40:35,0,POINT (464699.198 4448844.902)
7382,5481,"Թեվոսյան փողոց, Yerevan, AM","https://www.google.com/maps?q=40.18634,44.57833",40.18634,44.57833,Week 2,Sunday,2025-04-20,22:30-23:00,2025-04-20 22:42:56,0,POINT (464104.35 4448524.703)
7383,5481,"Թեվոսյան փողոց, Yerevan, AM","https://www.google.com/maps?q=40.18609,44.57687",40.18609,44.57687,Week 2,Sunday,2025-04-20,22:30-23:00,2025-04-20 22:44:33,0,POINT (463979.931 4448497.546)
7384,5481,"Թեվոսյան փողոց, Yerevan, AM","https://www.google.com/maps?q=40.18609,44.57687",40.18609,44.57687,Week 2,Sunday,2025-04-20,22:30-23:00,2025-04-20 22:49:35,0,POINT (463979.931 4448497.546)


In [10]:
def assign_direction_to_bus(auth_loc, buf_fwd_stop1=buf_fwd_stop1, buf_fwd_stop2=buf_fwd_stop2, buf_bwd_stop1=buf_bwd_stop1, buf_bwd_stop2=buf_bwd_stop2):
    """
    Assigns a travel direction ('forward' or 'backward') to each row in a GeoDataFrame
    based on whether points fall within predefined buffer zones around reference stops.

    Args:
        auth_loc (GeoDataFrame): The authentication location data with geometry column.
        buf_fwd_stop1 (Polygon): Buffer polygon around the forward start stop.
        buf_fwd_stop2 (Polygon): Buffer polygon around the forward end stop.
        buf_bwd_stop1 (Polygon): Buffer polygon around the backward start stop.
        buf_bwd_stop2 (Polygon): Buffer polygon around the backward end stop.

    Returns:
        GeoDataFrame: The input GeoDataFrame with a new 'direction' column added,
                      indicating either 'forward' or 'backward' where applicable.
    """
    auth_loc['direction'] = None
    current_direction = None
    in_direction = False

    for idx, row in auth_loc.iterrows():
        point = row.geometry

        if not in_direction:
            if point.within(buf_fwd_stop1):
                current_direction = 'forward'
                in_direction = True
            elif point.within(buf_bwd_stop1):
                current_direction = 'backward'
                in_direction = True

        else:
            if current_direction == 'forward' and point.within(buf_fwd_stop2):
                current_direction = 'backward'
            elif current_direction == 'backward' and point.within(buf_bwd_stop2):
                current_direction = 'forward'

        if current_direction is not None:
            auth_loc.at[idx, 'direction'] = current_direction

    return auth_loc


def trim_start_and_end_chunks(df):
    """
    Trims irrelevant data from the beginning and end of a bus trajectory
    based on where the direction value is missing or changes.

    Args:
        df (pd.DataFrame): DataFrame with a 'direction' column.

    Returns:
        pd.DataFrame: Trimmed DataFrame containing only the valid directional segments.
    """
    first_valid_idx = df['direction'].first_valid_index()
    if first_valid_idx is not None:
        df = df.loc[first_valid_idx:].copy()

    last_direction = df['direction'].dropna().iloc[-1] if not df['direction'].dropna().empty else None
    if last_direction:
        reversed_dir = df['direction'].iloc[::-1]
        switch_idx = reversed_dir[reversed_dir != last_direction].first_valid_index()
        if switch_idx is not None:
            cutoff_idx = df.index.get_loc(switch_idx)
            df = df.iloc[:cutoff_idx]
        else:
            df = df.iloc[0:0]

    return df


def assign_directions_to_all(data, buf_fwd_stop1=buf_fwd_stop1, buf_fwd_stop2=buf_fwd_stop2, buf_bwd_stop1=buf_bwd_stop1, buf_bwd_stop2=buf_bwd_stop2):
    """
    Assigns directions to all bus trips by iterating over each bus's data,
    applying spatial rules, and trimming irrelevant segments.

    Args:
        data (pd.DataFrame): DataFrame containing all bus movement records. Must include
                             'bus_number', 'datetime', and geometry.
        buf_fwd_stop1, buf_fwd_stop2, buf_bwd_stop1, buf_bwd_stop2 (Polygon): Buffer zones
                             used to determine directionality.

    Returns:
        pd.DataFrame: A new DataFrame with direction labels applied and trimmed per bus trip.
    """
    data['datetime'] = pd.to_datetime(data['datetime'])
    data = data.sort_values(by=['bus_number', 'datetime'])

    result = []

    for bus_id, bus_data in data.groupby('bus_number'):
        bus_result = assign_direction_to_bus(
            bus_data.copy(),
            buf_fwd_stop1, buf_fwd_stop2,
            buf_bwd_stop1, buf_bwd_stop2
        )
        bus_result_trimmed = trim_start_and_end_chunks(bus_result)
        result.append(bus_result_trimmed)

    return pd.concat(result).reset_index(drop=True)

In [11]:
auth_loc_dir = assign_directions_to_all(auth_loc)
auth_loc_dir.to_file(os.path.join(base_dir, "line_29/auth_loc_with_dir.geojson"), driver="GeoJSON")
auth_loc_dir.drop(columns='geometry').to_csv(os.path.join(base_dir, "line_29/auth_loc_with_dir.csv"), index=False)
auth_loc_dir

Unnamed: 0,bus_number,location,hyperlink,latitude,longitude,week_number,weekday,date,timestamp,datetime,auth_count_in_interval,geometry,direction
0,5481,"Մարշալ Խուդյակովի փողոց, Yerevan, AM","https://www.google.com/maps?q=40.21054,44.56576",40.21054,44.56576,Week 1,Monday,2025-04-07,07:30-08:00,2025-04-07 07:51:07,4,POINT (463047.435 4451215.93),backward
1,5481,"Մարշալ Խուդյակովի փողոց, Yerevan, AM","https://www.google.com/maps?q=40.21134,44.56753",40.21134,44.56753,Week 1,Monday,2025-04-07,07:30-08:00,2025-04-07 07:52:16,0,POINT (463198.49 4451303.99),backward
2,5481,"Մարշալ Խուդյակովի փողոց, Yerevan, AM","https://www.google.com/maps?q=40.21220,44.56977",40.21220,44.56977,Week 1,Monday,2025-04-07,07:30-08:00,2025-04-07 07:53:13,0,POINT (463389.569 4451398.519),backward
3,5481,"Գեներալ Սաֆարյանի փողոց, Yerevan, AM","https://www.google.com/maps?q=40.21276,44.57240",40.21276,44.57240,Week 1,Monday,2025-04-07,07:30-08:00,2025-04-07 07:54:20,1,POINT (463613.669 4451459.594),backward
4,5481,"Գեներալ Սաֆարյանի փողոց, Yerevan, AM","https://www.google.com/maps?q=40.21042,44.57772",40.21042,44.57772,Week 1,Monday,2025-04-07,07:30-08:00,2025-04-07 07:55:35,0,POINT (464065.136 4451197.699),backward
...,...,...,...,...,...,...,...,...,...,...,...,...,...
23327,5509,"Շենգավիթ, Yerevan, AM","https://www.google.com/maps?q=40.14543,44.48457",40.14543,44.48457,Week 1,Tuesday,2025-04-08,20:00-20:30,2025-04-08 20:06:10,1,POINT (456096.428 4444026.051),backward
23328,5509,"Շենգավիթ, Yerevan, AM","https://www.google.com/maps?q=40.14454,44.48432",40.14454,44.48432,Week 1,Tuesday,2025-04-08,20:00-20:30,2025-04-08 20:07:29,1,POINT (456074.56 4443927.39),backward
23329,5509,"Շենգավիթ, Yerevan, AM","https://www.google.com/maps?q=40.14355,44.48417",40.14355,44.48417,Week 1,Tuesday,2025-04-08,20:00-20:30,2025-04-08 20:07:55,0,POINT (456061.145 4443817.58),backward
23330,5509,"Շենգավիթ, Yerevan, AM","https://www.google.com/maps?q=40.14355,44.48417",40.14355,44.48417,Week 1,Tuesday,2025-04-08,20:00-20:30,2025-04-08 20:07:56,0,POINT (456061.145 4443817.58),backward
