In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import time
start_time = time.time()

# Used for loading csv files into excel
import openpyxl
from openpyxl.utils.dataframe import dataframe_to_rows
import os
import glob

In [None]:
df = pd.read_excel(r"")
# Anomalous readings: Sixth Avenue > KAP (13), Upp Changi > Tamp East (23, 26), Tamp East > Upp Changi (24), CCK to Gombak (53), Entire NEL (67-72)


In [None]:
# pd.set_option('display.max_rows', 500)
# Remove Anomalous readings
df = df.drop([13, 23, 26, 24, 53])
df = df.drop(range(67,73))

In [None]:
# Drop readings < 700m as deemed as anomalous
above_ground = df[(df["Above Ground"]=="Yes") & (df["Distance Travelled (m)"] > 700)]
below_ground = df[(df["Above Ground"]=="No") & (df["Distance Travelled (m)"] > 700)]

In [None]:
# Initialise variables

# Mean Acceleration
below_ground_y = below_ground["Mean Acceleration (ms^-2)"]
above_ground_y = above_ground["Mean Acceleration (ms^-2)"]

# Total Distance
below_ground_x = below_ground["Distance Travelled (m)"]
above_ground_x = above_ground["Distance Travelled (m)"]

# Total Time Taken
below_ground_t = below_ground["Total Time Taken (s)"]
above_ground_t = above_ground["Total Time Taken (s)"]

# Max Velocity
below_ground_v = below_ground["Max Velocity (ms^-1)"]
above_ground_v = above_ground["Max Velocity (ms^-1)"]

# Mean Velocity
below_ground_mean_v = below_ground["Distance Travelled (m)"] / below_ground["Total Time Taken (s)"]
above_ground_mean_v = above_ground["Distance Travelled (m)"] / above_ground["Total Time Taken (s)"]

In [None]:
plt.scatter(below_ground_t, below_ground_y, c='b', label='Below Ground')
plt.scatter(above_ground_t, above_ground_y, c='r', label='Above Ground')
plt.xlabel('Total Time Taken (s)')
plt.ylabel('Mean Acceleration (ms^-2)')
plt.legend(loc='best')
plt.gcf().set_size_inches((10, 10))  
plt.show()

In [None]:
plt.scatter(below_ground_x, below_ground_v, c='b', label='Below Ground')
plt.scatter(above_ground_x, above_ground_v, c='r', label='Above Ground')
plt.xlabel('Distance Travelled (m)')
plt.ylabel('Max Velocity (ms^-1)')
plt.legend(loc='best')
plt.show()

In [None]:
plt.scatter(below_ground_x, below_ground_mean_v, c='b', label='Below Ground')
plt.scatter(above_ground_x, above_ground_mean_v, c='r', label='Above Ground')
plt.xlabel('Distance Travelled (m)')
plt.ylabel('Mean Velocity (ms^-1)')
plt.legend(loc='best')
plt.show()

In [None]:
plt.scatter(below_ground_x, below_ground_t, c='b', label='Below Ground')
plt.scatter(above_ground_x, above_ground_t, c='r', label='Above Ground')
plt.title('Time-Distance Graph')
plt.xlabel('Total Distance Travelled (m)')
plt.ylabel('Total Time Taken (s)')
plt.legend(loc='best')
plt.gcf().set_size_inches((10, 10))    
plt.show()

plt.scatter(below_ground_t, below_ground_x, c='b', label='Below Ground')
plt.scatter(above_ground_t, above_ground_x, c='r', label='Above Ground')
plt.title('Distance-Time Graph')
plt.xlabel('Total Time Taken (s)')
plt.ylabel('Total Distance Travelled (m)')
plt.legend(loc='best')
plt.gcf().set_size_inches((10, 10))    
plt.show()

In [None]:
# To remove samples which still appear slightly anomalous and to analyse the subset of data where dist travelled is > 700m and less than 3000m
below_ground_2 = below_ground[below_ground["Distance Travelled (m)"] < 3000]
above_ground_2 = above_ground[above_ground["Distance Travelled (m)"] < 3000]

In [None]:
# Mean Acceleration
bg2_y = below_ground_2["Mean Acceleration (ms^-2)"]
ag2_y = above_ground_2["Mean Acceleration (ms^-2)"]

# Total Distance
bg2_x = below_ground_2["Distance Travelled (m)"]
ag2_x = above_ground_2["Distance Travelled (m)"]

# Total Time Taken
bg2_t = below_ground_2["Total Time Taken (s)"]
ag2_t = above_ground_2["Total Time Taken (s)"]

# Max Velocity
bg2_v = below_ground_2["Max Velocity (ms^-1)"]
ag2_v = above_ground_2["Max Velocity (ms^-1)"]

# Mean Velocity
bg2_mv = below_ground_2["Distance Travelled (m)"] / below_ground_2["Total Time Taken (s)"]
ag2_mv = above_ground_2["Distance Travelled (m)"] / above_ground_2["Total Time Taken (s)"]

In [None]:
plt.scatter(bg2_t, bg2_x, c='b', label='Below Ground')
plt.scatter(ag2_t, ag2_x, c='r', label='Above Ground')
plt.title("Distance-Time Graph")
plt.xlabel('Total Time Taken (s)')
plt.ylabel('Total Distance Travelled (m)')
plt.legend(loc='best')
plt.gcf().set_size_inches((10, 10))    
plt.show()

print("Above ground takes a longer time than Below ground")

In [None]:
# Acceleration-Distance Graph
plt.scatter(bg2_x, bg2_y, c='b', label='Below Ground')
plt.scatter(ag2_x, ag2_y, c='r', label='Above Ground')
plt.title('Acceleration-Distance Graph')
plt.xlabel('Total Distance Travelled (m)')
plt.ylabel('Mean Acceleration (ms^-2)')
plt.legend(loc='best')
plt.gcf().set_size_inches((10, 10))  
plt.show()

print("No discernible difference")

In [None]:
# Mean Time Taken
mean_t_below = bg2_t.mean()
mean_t_above = ag2_t.mean()

# Median Time Taken
median_t_below = bg2_t.median()
median_t_above = ag2_t.median()

# Mean Velocity
mean_v_below = sum(bg2_x) / sum(bg2_t)
mean_v_above = sum(ag2_x) / sum(ag2_t)

# Mean Acceleration
mean_a_below = bg2_y.mean()
mean_a_above = ag2_y.mean()

print(
f'''
Taking samples where distance travelled is between 700m and 3000m:

Number of samples below ground = {len(below_ground_2)}
Number of samples above ground = {len(above_ground_2)}

Mean t (Below) = {mean_t_below}
Median t (Below) = {median_t_below}
Mean t (Above) = {mean_t_above}
Median t (Above) = {median_t_above}

Mean v (Below) = {mean_v_below}
Mean v (Above) = {mean_v_above}

Mean a (Below) = {mean_a_below}
Mean a (Above) = {mean_a_above}

Mean t (Above) - Mean t (Below) = {mean_t_above - mean_t_below} ['+' = Above SLOWER than BELOW]
Median t (Above) - Median t (Below) = {median_t_above - median_t_below} ['+' = Above SLOWER than BELOW]
Mean v (Above) - Mean v (Below) = {mean_v_above - mean_v_below} ['-' = Above SLOWER than BELOW]
Mean a (Above) - Mean a (Below) = {mean_a_above - mean_a_below} [Negligible difference]

Conclusion:
In terms of mean velocity and mean time taken, trains travelling above ground do run slower than that below ground.
However, based on our results, this is not significantly affected by the time trains take to accelerate.
However, it may be significantly affected by the time trains take to decelerate as trains travelling above ground often take a while to stop.
''')