## Importing libraries

In [None]:
# !pip install python-dotenv
import requests
import pandas as pd
import geopandas as gpd
from dotenv import load_dotenv
import os
import zipfile
import shutil

# Retrieving api key
load_dotenv("../key.env")
api_key = os.getenv("API_KEY")
print(api_key)

In [None]:
#Running the get_bus_info function to make bus info related API calls
%run get_bus_info_function.ipynb

In [None]:
bus_services_df = get_bus_info("https://datamall2.mytransport.sg/ltaodataservice/BusServices", api_key)
bus_routes_df = get_bus_info("https://datamall2.mytransport.sg/ltaodataservice/BusRoutes", api_key)
bus_stops_df = get_bus_info("https://datamall2.mytransport.sg/ltaodataservice/BusStops", api_key)

In [None]:
bus_stop_pv_jul = pd.read_csv("../datasets/pv_bus_stops/transport_node_bus_202407.csv")
bus_stop_pv_aug = pd.read_csv("../datasets/pv_bus_stops/transport_node_bus_202408.csv")
bus_stop_pv_sep = pd.read_csv("../datasets/pv_bus_stops/transport_node_bus_202409.csv")

In [None]:
bus_routes_simple = bus_routes_df[["ServiceNo", "Direction", "StopSequence", "BusStopCode"]]
bus_stops_simple = bus_stops_df[["BusStopCode", "RoadName", "Description"]]

bus_routes_stops = pd.merge(bus_routes_simple, bus_stops_simple, on="BusStopCode", how="inner")
bus_routes_stops.head()

### Total Passenger Volume by Bus Route

In [None]:
# Convert 'BusStopCode' and 'BusStop' to string
bus_routes_stops['BusStopCode'] = bus_routes_stops['BusStopCode'].astype(str)
bus_stop_pv_jul['PT_CODE'] = bus_stop_pv_jul['PT_CODE'].astype(str)
bus_stop_pv_aug['PT_CODE'] = bus_stop_pv_aug['PT_CODE'].astype(str)
bus_stop_pv_sep['PT_CODE'] = bus_stop_pv_sep['PT_CODE'].astype(str)

# Merge bus_routes with bus_stops_passenger_volume to get passenger volumes per route
route_passenger_volumes = pd.merge(bus_routes_stops, bus_stop_pv_jul, 
                                   left_on='BusStopCode', right_on="PT_CODE", how='left')
route_passenger_volumes_aug = pd.merge(bus_routes_stops, bus_stop_pv_aug, 
                                       left_on='BusStopCode', right_on="PT_CODE", how='left')
route_passenger_volumes_sep = pd.merge(bus_routes_stops, bus_stop_pv_sep, 
                                       left_on='BusStopCode', right_on="PT_CODE", how='left')

# Group by 'ServiceNo' and 'YEAR_MONTH' to get the total tap-in and tap-out volumes for each service in each month
monthly_passenger_volume_jul = route_passenger_volumes.groupby(['ServiceNo', 'YEAR_MONTH'])[['TOTAL_TAP_IN_VOLUME', 'TOTAL_TAP_OUT_VOLUME']].sum().reset_index()
monthly_passenger_volume_aug = route_passenger_volumes_aug.groupby(['ServiceNo', 'YEAR_MONTH'])[['TOTAL_TAP_IN_VOLUME', 'TOTAL_TAP_OUT_VOLUME']].sum().reset_index()
monthly_passenger_volume_sep = route_passenger_volumes_sep.groupby(['ServiceNo', 'YEAR_MONTH'])[['TOTAL_TAP_IN_VOLUME', 'TOTAL_TAP_OUT_VOLUME']].sum().reset_index()

In [None]:
# Concatenate all three months' data
all_months_data = pd.concat([monthly_passenger_volume_jul, 
                             monthly_passenger_volume_aug,
                             monthly_passenger_volume_sep])

# Reset index after concatenation
all_months_data.reset_index(drop=True, inplace=True)

# Create a new column 'TOTAL_TAP_VOLUME'
all_months_data['TOTAL_TAP_VOLUME'] = all_months_data['TOTAL_TAP_IN_VOLUME'] + all_months_data['TOTAL_TAP_OUT_VOLUME']

In [None]:
# Pivot the data to have 'ServiceNo' as rows and 'YEAR_MONTH' as columns
tap_in_pivot = all_months_data.pivot(index='ServiceNo', columns='YEAR_MONTH', values='TOTAL_TAP_VOLUME')

# Preview the pivot table
tap_in_pivot.head()

# Assuming tap_in_pivot already has the data for '2024-07', '2024-08', and '2024-09'
tap_in_pivot['Decreasing'] = (tap_in_pivot['2024-07'] > tap_in_pivot['2024-08']) & (tap_in_pivot['2024-08'] > tap_in_pivot['2024-09'])

# Create a DataFrame for routes with decreasing tap-ins
decreasing_routes_df = tap_in_pivot[tap_in_pivot['Decreasing']].reset_index()
decreasing_routes_df.rename(columns={'YEAR_MONTH': 'Index'}, inplace=True)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Assuming 'decreasing_routes_df' has the tap-in data for the months
# Calculate the decrease from '2024-07' to '2024-09'
decreasing_routes_df['Decrease'] = decreasing_routes_df['2024-07'] - decreasing_routes_df['2024-09']

# Sort the DataFrame by 'Decrease' in descending order
decreasing_routes_df_sorted = decreasing_routes_df.sort_values(by='Decrease', ascending=False)
decreasing_routes_df_sorted.to_csv('../datasets/pv_eda/routes_w_decreasing_pv.csv', index=False)

In [None]:
# Select the top 10 routes with the largest decrease
top_10_decreasing_routes = decreasing_routes_df_sorted.head(10)

# Extract the tap-in data for the selected routes using .loc
tap_in = tap_in_pivot.loc[top_10_decreasing_routes['ServiceNo'], ['2024-07', '2024-08', '2024-09']]

# Reset index to make 'ServiceNo' a column
tap_in.reset_index(inplace=True)

# Plot the line graph
plt.figure(figsize=(12, 8))

# Plotting directly without iterating
tap_in.set_index('ServiceNo').T.plot(marker='o', ax=plt.gca())

plt.title('Top 10 Routes with Largest Decrease (2024-07 to 2024-09)')
plt.xlabel('Month')
plt.ylabel('Tap-In Volume')
plt.xticks(rotation=45)
plt.legend(title='Service Number', bbox_to_anchor=(1, 1), loc='upper left')
plt.grid()
plt.show()