In [2]:
# Libraries
import requests
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Data Collection: DO NOT RUN

## Bus Services

In [None]:
base_url = "http://datamall2.mytransport.sg/ltaodataservice"
endpoint_url1 = "/BusServices"
endpoint_url2 = "/BusServices?$skip=500"
endpoints = [endpoint_url1, endpoint_url2]

busservices = []

for endpoint in endpoints:
    resource_url = base_url + endpoint
    # Request data from the server
    res = requests.get(resource_url, headers={"AccountKey": "/QEgD9jXTcORylTPew4GVA==", "accept": "application/json"})
    res_list = res.json()
    df = pd.DataFrame(res_list['value'])
    busservices.append(df)

busservices_df = pd.concat(busservices, ignore_index=True)
busservices_df.to_csv('../data/busservices.csv', index=False)
busservices_df.head(10)

## Bus Routes

In [None]:
base_url = "http://datamall2.mytransport.sg/ltaodataservice"
skip_values = list(range(0, 30000, 500))  # Generating skip values from 0 to 30000 in steps of 500

busroutes = []

for skip in skip_values:
    endpoint_url = f"/BusRoutes?$skip={skip}"
    resource_url = base_url + endpoint_url
    res = requests.get(resource_url, headers={"AccountKey": "/QEgD9jXTcORylTPew4GVA==", "accept": "application/json"})
    res_list = res.json()
    df = pd.DataFrame(res_list['value'])
    busroutes.append(df)

busroutes_df = pd.concat(busroutes, ignore_index=True)


In [None]:
busroutes_df.to_csv("../data/busroutes.csv", index=False)

## Planning Areas from Onemap API

In [None]:
url = "https://www.onemap.gov.sg/api/public/popapi/getAllPlanningarea?year=2019"
token = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiI3MWQ2ZDc2OTZlOGViYzUxZTYzODI5N2I0MzdjZWI5NCIsImlzcyI6Imh0dHA6Ly9pbnRlcm5hbC1hbGItb20tcHJkZXppdC1pdC0xMjIzNjk4OTkyLmFwLXNvdXRoZWFzdC0xLmVsYi5hbWF6b25hd3MuY29tL2FwaS92Mi91c2VyL3Bhc3N3b3JkIiwiaWF0IjoxNzEwOTIwMDc3LCJleHAiOjE3MTExNzkyNzcsIm5iZiI6MTcxMDkyMDA3NywianRpIjoia2VtMlFvOGlrOFpKUFB3ciIsInVzZXJfaWQiOjI5OTUsImZvcmV2ZXIiOmZhbHNlfQ.PrqMd97I0gruP6kPpZkPfnERdPdnJbyPeV_okC5UCpQ'
headers = {"Authorization": token}      
      
response = requests.request("GET", url, headers=headers)
resp_list = response.json()

### Convert GeoJSON into dataframe

In [None]:
import json
import geopandas as gpd

planning_area = pd.DataFrame(resp_list['SearchResults'])
gdf = []
for index, row in planning_area.iterrows():
    gdf_area = gpd.GeoDataFrame(json.loads(row['geojson']))
    gdf_area['pln_area_n'] = row['pln_area_n']
    gdf.append(gdf_area)
planning_area_gdf = pd.concat(gdf, ignore_index=True)
planning_area_gdf.head(10)
# save locally
planning_area_gdf.to_csv('../data/planning_area.csv', index=False)

## Bus Stops

In [None]:
# bus stops
base_url = "http://datamall2.mytransport.sg/ltaodataservice"
skip_values = list(range(0, 5000, 500))

busstops = []

for skip in skip_values:
    endpoint_url = f"/BusStops?$skip={skip}"
    resource_url = base_url + endpoint_url
    res = requests.get(resource_url, headers={"AccountKey": "7hrdcp4vQ0inFGn4IwWHQw==", "accept": "application/json"})
    res_list = res.json()
    df = pd.DataFrame(res_list['value'])
    busstops.append(df)

busstops_df = pd.concat(busstops, ignore_index=True)

busstops_df.to_csv("../data/busstops.csv", index=False)

### Extract Planning Area from OneMap API to join with Bus Stops df

In [None]:
busstops = pd.read_csv('../data/busstops.csv')

busstops['planning_area'] = ''

domain = 'https://www.onemap.gov.sg/api/public/popapi/getPlanningarea?'
token = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiI3MWQ2ZDc2OTZlOGViYzUxZTYzODI5N2I0MzdjZWI5NCIsImlzcyI6Imh0dHA6Ly9pbnRlcm5hbC1hbGItb20tcHJkZXppdC1pdC0xMjIzNjk4OTkyLmFwLXNvdXRoZWFzdC0xLmVsYi5hbWF6b25hd3MuY29tL2FwaS92Mi91c2VyL3Bhc3N3b3JkIiwiaWF0IjoxNzEwOTIwMDc3LCJleHAiOjE3MTExNzkyNzcsIm5iZiI6MTcxMDkyMDA3NywianRpIjoia2VtMlFvOGlrOFpKUFB3ciIsInVzZXJfaWQiOjI5OTUsImZvcmV2ZXIiOmZhbHNlfQ.PrqMd97I0gruP6kPpZkPfnERdPdnJbyPeV_okC5UCpQ'
headers = {"Authorization": token}
incl_lat = 'latitude='
incl_long = '&longitude='

for index,row in busstops.iterrows():
    
    if index %100 ==0:
        print(index)
    
    #print(index)
    
    lat = str(row['Latitude'])
    long = str(row['Longitude'])
    
    query_string = domain+incl_lat+lat+incl_long+long
    print(query_string)

    try:
        response = requests.request("GET", query_string, headers=headers, timeout=15)
        resp_list = response.json()
        print(resp_list[0]['pln_area_n'])

        busstops.loc[index, "planning_area"] = resp_list[0]['pln_area_n']
        
    except:
        busstops.loc[index, "planning_area"] = 'invalid'

busstops.tail(10)

In [None]:
print(busstops[busstops['planning_area'] == 'invalid'])

#### Drop Bus Stops in Johor Bahru (MY)

In [None]:
busstops = busstops.drop(busstops[busstops['planning_area'] == 'invalid'].index)
busstops.info()

#### Save data locally

In [None]:
busstops.to_csv('../data/busstops_with_planningarea.csv', index=False)

## Train Stations

In [None]:
import geopandas as gpd

# Read the shapefile
shape = gpd.read_file("../data/TrainStation_Feb2023/RapidTransitSystemStation.shp")
shape.info()
shape.head()


In [None]:
# Calculate centroid coordinates
shape['centroid_y'] = shape.geometry.centroid.y
shape['centroid_x'] = shape.geometry.centroid.x

# Display the GeoDataFrame with centroid coordinates
print(shape[['centroid_y', 'centroid_x']])

### Convert to Longitude, Latitude coordinates

In [None]:
from pyproj import Proj, transform

# Define the SVY21 projection (EPSG:3414)
svy21 = Proj(init='EPSG:3414')

# Define the WGS84 projection (EPSG:4326)
wgs84 = Proj(init='EPSG:4326')

# Define the SVY21 coordinates (example values)
svy21_x = shape['centroid_x']
svy21_y = shape['centroid_y']

# Perform the coordinate transformation
lon, lat = transform(svy21, wgs84, svy21_x, svy21_y)

# Print the latitude and longitude coordinates
location = pd.DataFrame({'latitude': lat, 'longitude': lon})
trainstations = pd.concat([shape, location], axis=1)
trainstations.head()


In [None]:
trainstations = trainstations.drop(columns=['TYP_CD', 'STN_NAM', 'centroid_y', 'centroid_x'])

### Extract Planning Area from OneMap API for Train Stations

In [None]:
trainstations['planning_area'] = ''

domain = 'https://www.onemap.gov.sg/api/public/popapi/getPlanningarea?'
token = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiI3MWQ2ZDc2OTZlOGViYzUxZTYzODI5N2I0MzdjZWI5NCIsImlzcyI6Imh0dHA6Ly9pbnRlcm5hbC1hbGItb20tcHJkZXppdC1pdC0xMjIzNjk4OTkyLmFwLXNvdXRoZWFzdC0xLmVsYi5hbWF6b25hd3MuY29tL2FwaS92Mi91c2VyL3Bhc3N3b3JkIiwiaWF0IjoxNzEwOTIwMDc3LCJleHAiOjE3MTExNzkyNzcsIm5iZiI6MTcxMDkyMDA3NywianRpIjoia2VtMlFvOGlrOFpKUFB3ciIsInVzZXJfaWQiOjI5OTUsImZvcmV2ZXIiOmZhbHNlfQ.PrqMd97I0gruP6kPpZkPfnERdPdnJbyPeV_okC5UCpQ'
headers = {"Authorization": token}
incl_lat = 'latitude='
incl_long = '&longitude='

for index,row in trainstations.iterrows():
    
    if index %100 ==0:
        print(index)
    
    #print(index)
    
    lat = str(row['latitude'])
    long = str(row['longitude'])
    
    query_string = domain+incl_lat+lat+incl_long+long
    print(query_string)

    try:
        response = requests.request("GET", query_string, headers=headers, timeout=15)
        resp_list = response.json()
        print(resp_list[0]['pln_area_n'])

        trainstations.loc[index, "planning_area"] = resp_list[0]['pln_area_n']
        
    except:
        trainstations.loc[index, "planning_area"] = 'invalid'

trainstations.tail(10)

#### Save data locally

In [None]:
trainstations.to_csv('../data/trainstations_with_planningarea.csv', index=False)

## Passenger Volume by Bus Stops

In [None]:
# Passenger volume: produces link to download csv
base_url = "http://datamall2.mytransport.sg/ltaodataservice"
endpoint = "/PV/Bus"
resource_url = base_url + endpoint
last_3_months = ['202402', '202401', '202312']
for month in last_3_months:
    query_params = {'Date': month}
    # Request data from the server
    res = requests.get(resource_url, headers={"AccountKey": "7hrdcp4vQ0inFGn4IwWHQw==", "accept": "application/json"}, params=query_params)
    res_list = res.json()
    print(res_list['value'])

# Combining downloaded passenger volume data
df1 = pd.read_csv("../data/transport_node_bus_202402.csv")
df2 = pd.read_csv("../data/transport_node_bus_202401.csv")
df3 = pd.read_csv("../data/transport_node_bus_202312.csv")
pv_df = pd.concat([df1, df2, df3])

pv_df['PT_CODE'] = pv_df['PT_CODE'].apply(lambda x: '{:05d}'.format(x))

#save as csv file
pv_df.to_csv("../data/pv.csv", index=False)

# Exploratory Data Analysis

## Passenger Volume by Bus Stops

In [None]:
pv_df = pd.read_csv("../data/pv.csv")

# Drop rows with missing values
pv_df = pv_df.dropna()

# Convert TIME_PER_HOUR to int
pv_df['TIME_PER_HOUR'] = pv_df['TIME_PER_HOUR'].astype(int)

# Create a new column for average passenger volume
pv_df['avg_volume'] = (pv_df['TOTAL_TAP_IN_VOLUME'] + pv_df['TOTAL_TAP_OUT_VOLUME']) / 2
pv_df.head()

### Distribution of Average Passenger Volume based on Hour of the Day

In [None]:
# Plot the distribution of average passenger volume based on hour of the day
plt.figure(figsize=(10, 6))
sns.boxplot(x='TIME_PER_HOUR', y='avg_volume', hue='DAY_TYPE', data=pv_df, showfliers=False)
plt.title('Distribution of Average Passenger Volume Based on Hour of the Day')
plt.xlabel('Hour of the Day')
plt.ylabel('Average Passenger Volume')
plt.show()

## Train Stations

In [None]:
import folium

trainstations = pd.read_csv("../data/trainstations_with_planningarea.csv")

# Create a map centered around the mean latitude and longitude of your data
map_center = [trainstations['latitude'].mean(), trainstations['longitude'].mean()]
mymap = folium.Map(location=map_center, zoom_start=12)

# Add markers for each data point
for index, row in trainstations.iterrows():
    folium.Circle([row['latitude'], row['longitude']], 
                  radius=50,  # Adjust the radius as needed
                  color='blue',  # Set the color of the circle
                  fill=True,
                  fill_color='blue',
                  fill_opacity=0.4,
                  popup=row['STN_NAM_DE']).add_to(mymap)

# Save the map as an HTML file
mymap

# Feature Engineering: Construction of Connectivity Scores

## Load datasets locally

In [3]:
planning_area = pd.read_csv('../data/planning_area.csv')
busstops = pd.read_csv('../data/busstops_with_planningarea.csv')
trainstations = pd.read_csv('../data/trainstations_with_planningarea.csv')
busroutes = pd.read_csv('../data/busroutes.csv')
busservices = pd.read_csv('../data/busservices.csv')

planning_area.info()
busstops.info()
trainstations.info()
busroutes.info()
busservices.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101 entries, 0 to 100
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   type         101 non-null    object
 1   coordinates  101 non-null    object
 2   pln_area_n   101 non-null    object
dtypes: object(3)
memory usage: 2.5+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5102 entries, 0 to 5101
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   BusStopCode    5102 non-null   int64  
 1   RoadName       5102 non-null   object 
 2   Description    5102 non-null   object 
 3   Latitude       5102 non-null   float64
 4   Longitude      5102 non-null   float64
 5   planning_area  5102 non-null   object 
dtypes: float64(2), int64(1), object(3)
memory usage: 239.3+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220 entries, 0 to 219
Data columns (total 6 columns):
 #   Column         Non-N

## All Planning Areas
Extract Planning Area Names

In [4]:
unique_planning_area = planning_area['pln_area_n'].unique()
unique_planning_area = pd.DataFrame(unique_planning_area, columns=['planning_area'])
unique_planning_area

Unnamed: 0,planning_area
0,BEDOK
1,BUKIT TIMAH
2,BUKIT BATOK
3,BUKIT MERAH
4,CENTRAL WATER CATCHMENT
5,DOWNTOWN CORE
6,CHANGI
7,CHANGI BAY
8,LIM CHU KANG
9,BOON LAY


## Availability Score

### Number of Unique Bus Stops per Area

In [5]:
num_busstops = busstops.groupby('planning_area').size().reset_index(name='num_busstops')
num_busstops

Unnamed: 0,planning_area,num_busstops
0,ANG MO KIO,167
1,BEDOK,286
2,BISHAN,98
3,BOON LAY,67
4,BUKIT BATOK,162
5,BUKIT MERAH,176
6,BUKIT PANJANG,103
7,BUKIT TIMAH,112
8,CENTRAL WATER CATCHMENT,20
9,CHANGI,90


### Number of Unique Train Stations per Area

In [6]:
num_trainstations = trainstations.groupby('planning_area').size().reset_index(name='num_trainstations')
num_trainstations

Unnamed: 0,planning_area,num_trainstations
0,ANG MO KIO,4
1,BEDOK,6
2,BISHAN,6
3,BUKIT BATOK,3
4,BUKIT MERAH,8
5,BUKIT PANJANG,11
6,BUKIT TIMAH,6
7,CHANGI,1
8,CHOA CHU KANG,8
9,CLEMENTI,2


### Number of Unique Bus Services per Area

In [7]:
busroutes_planning_area = busroutes.merge(busstops, left_on='BusStopCode', right_on='BusStopCode', how='left')
busroutes_planningarea = busroutes_planning_area[['ServiceNo', 'planning_area']].drop_duplicates()
num_busservices = busroutes_planningarea.groupby('planning_area').size().reset_index(name='num_busservices')
num_busservices

Unnamed: 0,planning_area,num_busservices
0,ANG MO KIO,70
1,BEDOK,93
2,BISHAN,47
3,BOON LAY,13
4,BUKIT BATOK,56
5,BUKIT MERAH,66
6,BUKIT PANJANG,40
7,BUKIT TIMAH,37
8,CENTRAL WATER CATCHMENT,20
9,CHANGI,28


### Average Frequency of Bus Services per Area

In [8]:
busservices_planning_area = busservices.merge(busroutes_planning_area, left_on='ServiceNo', right_on='ServiceNo', how='left')
busservices_planning_area.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39603 entries, 0 to 39602
Data columns (total 27 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   ServiceNo        39603 non-null  object 
 1   Operator_x       39603 non-null  object 
 2   Direction_x      39603 non-null  int64  
 3   Category         39603 non-null  object 
 4   OriginCode       39486 non-null  float64
 5   DestinationCode  39486 non-null  float64
 6   AM_Peak_Freq     39603 non-null  object 
 7   AM_Offpeak_Freq  39603 non-null  object 
 8   PM_Peak_Freq     39603 non-null  object 
 9   PM_Offpeak_Freq  39603 non-null  object 
 10  LoopDesc         8981 non-null   object 
 11  Operator_y       39603 non-null  object 
 12  Direction_y      39603 non-null  int64  
 13  StopSequence     39603 non-null  int64  
 14  BusStopCode      39603 non-null  int64  
 15  Distance         39603 non-null  float64
 16  WD_FirstBus      39603 non-null  object 
 17  WD_LastBus  

In [9]:
busservices_planningarea = busservices_planning_area.iloc[:, [0,2,6,7,8,9,26]].drop_duplicates()
busservices_planningarea.head()

Unnamed: 0,ServiceNo,Direction_x,AM_Peak_Freq,AM_Offpeak_Freq,PM_Peak_Freq,PM_Offpeak_Freq,planning_area
0,118,1,5-08,8-12,8-10,09-14,PUNGGOL
6,118,1,5-08,8-12,8-10,09-14,TAMPINES
41,118,1,5-08,8-12,8-10,09-14,PAYA LEBAR
46,118,1,5-08,8-12,8-10,09-14,SENGKANG
51,118,2,10-10,8-11,4-08,9-12,PUNGGOL


In [10]:
# separate min and max freq
columns = ['AM_Peak_Freq', 'AM_Offpeak_Freq', 'PM_Peak_Freq', 'PM_Offpeak_Freq']
new_columns = ['AM_peak_freq', 'AM_offpeak_freq', 'PM_peak_freq', 'PM_offpeak_freq']

for col, new_col in zip(columns, new_columns):
    busservices_planningarea[[f'{new_col}_min', f'{new_col}_max']] = busservices_planningarea[col].str.split('-', expand=True)

# convert freq to numeric values
for i in range(7, 15):
    column_index = i
    busservices_planningarea.iloc[:, i] = pd.to_numeric(busservices_planningarea.iloc[:, i], errors='coerce')

# drop original freq columns
busservices_planningarea = busservices_planningarea.drop(columns=['AM_Peak_Freq', 'AM_Offpeak_Freq', 'PM_Peak_Freq', 'PM_Offpeak_Freq'])
busservices_planningarea.head()

Unnamed: 0,ServiceNo,Direction_x,planning_area,AM_peak_freq_min,AM_peak_freq_max,AM_offpeak_freq_min,AM_offpeak_freq_max,PM_peak_freq_min,PM_peak_freq_max,PM_offpeak_freq_min,PM_offpeak_freq_max
0,118,1,PUNGGOL,5.0,8.0,8.0,12.0,8.0,10.0,9.0,14.0
6,118,1,TAMPINES,5.0,8.0,8.0,12.0,8.0,10.0,9.0,14.0
41,118,1,PAYA LEBAR,5.0,8.0,8.0,12.0,8.0,10.0,9.0,14.0
46,118,1,SENGKANG,5.0,8.0,8.0,12.0,8.0,10.0,9.0,14.0
51,118,2,PUNGGOL,10.0,10.0,8.0,11.0,4.0,8.0,9.0,12.0


In [11]:
busservices_freq = busservices_planningarea.copy()

# calculate average frequency
freq_columns = ['AM_peak_freq_min', 'AM_peak_freq_max', 'AM_offpeak_freq_min', 'AM_offpeak_freq_max', 'PM_peak_freq_min', 'PM_peak_freq_max', 'PM_offpeak_freq_min', 'PM_offpeak_freq_max']
busservices_freq['avg_bus_freq'] = busservices_freq[freq_columns].mean(axis=1)

avg_freq = busservices_freq.groupby('planning_area')['avg_bus_freq'].mean().reset_index(name='avg_bus_freq')
avg_freq['avg_bus_freq'] = avg_freq['avg_bus_freq'].astype(float).round()
avg_freq

Unnamed: 0,planning_area,avg_bus_freq
0,ANG MO KIO,14.0
1,BEDOK,14.0
2,BISHAN,13.0
3,BOON LAY,12.0
4,BUKIT BATOK,13.0
5,BUKIT MERAH,14.0
6,BUKIT PANJANG,13.0
7,BUKIT TIMAH,13.0
8,CENTRAL WATER CATCHMENT,14.0
9,CHANGI,15.0


### Merge all into `availability` dataframe

In [12]:
availability = pd.merge(unique_planning_area, num_busstops, on='planning_area', how='left') \
                    .merge(num_trainstations, on='planning_area', how='left') \
                    .merge(num_busservices, on='planning_area', how='left') \
                    .merge(avg_freq, on='planning_area', how='left')
availability

Unnamed: 0,planning_area,num_busstops,num_trainstations,num_busservices,avg_bus_freq
0,BEDOK,286.0,6.0,93.0,14.0
1,BUKIT TIMAH,112.0,6.0,37.0,13.0
2,BUKIT BATOK,162.0,3.0,56.0,13.0
3,BUKIT MERAH,176.0,8.0,66.0,14.0
4,CENTRAL WATER CATCHMENT,20.0,,20.0,14.0
5,DOWNTOWN CORE,79.0,15.0,95.0,15.0
6,CHANGI,90.0,1.0,28.0,15.0
7,CHANGI BAY,1.0,,1.0,12.0
8,LIM CHU KANG,29.0,,6.0,14.0
9,BOON LAY,67.0,,13.0,12.0


### Scale the values into `avail_values` for construction of Availability Score

In [16]:
from sklearn.preprocessing import MinMaxScaler

# Apply Min-Max scaling to normalize values to positive numbers only
min_max_scaler = MinMaxScaler()
avail_values = availability.drop(columns='planning_area')  # Make a copy to avoid modifying the original DataFrame

# Reverse direction of Freq columns
avail_values['avg_bus_freq'] = - avail_values['avg_bus_freq']

# Shift values by their minimum value to make them positive
avail_values = avail_values - avail_values.min()

# Apply Min-Max scaling to scale values to the range [0, 1]
avail_values = pd.DataFrame(min_max_scaler.fit_transform(avail_values), columns=avail_values.columns)

# Add the avail_values columns back to the DataFrame
avail_values = avail_values.add_suffix('_score')
avail_with_score = pd.concat([availability, avail_values], axis=1)
avail_with_score

Unnamed: 0,planning_area,num_busstops,num_trainstations,num_busservices,avg_bus_freq,num_busstops_score,num_trainstations_score,num_busservices_score,avg_bus_freq_score
0,BEDOK,286.0,6.0,93.0,14.0,1.0,0.294118,0.978723,0.882353
1,BUKIT TIMAH,112.0,6.0,37.0,13.0,0.389474,0.294118,0.382979,0.941176
2,BUKIT BATOK,162.0,3.0,56.0,13.0,0.564912,0.117647,0.585106,0.941176
3,BUKIT MERAH,176.0,8.0,66.0,14.0,0.614035,0.411765,0.691489,0.882353
4,CENTRAL WATER CATCHMENT,20.0,,20.0,14.0,0.066667,,0.202128,0.882353
5,DOWNTOWN CORE,79.0,15.0,95.0,15.0,0.273684,0.823529,1.0,0.823529
6,CHANGI,90.0,1.0,28.0,15.0,0.312281,0.0,0.287234,0.823529
7,CHANGI BAY,1.0,,1.0,12.0,0.0,,0.0,1.0
8,LIM CHU KANG,29.0,,6.0,14.0,0.098246,,0.053191,0.882353
9,BOON LAY,67.0,,13.0,12.0,0.231579,,0.12766,1.0


In [17]:
avail_with_score.to_csv('../data/avail_with_score.csv', index=False)

$$
\text{avail\_score} = w_1 \times \text{num\_busstops\_score} + w_2 \times \text{num\_trainstations\_score} + w_3 \times \text{num\_busservices\_score} + w_4 \times \text{avg\_bus\_freq\_score}
$$
$$ \text{where } w_i \text{ are weights to be assigned by the user in the interface and } {\sum_{i=1}^{4} w_i} = 1
$$