In [242]:
import pandas as pd
import plotly.express as px
import pandas as pd
from math import radians, sin, cos, sqrt, atan2

In [243]:
def haversine_distance(lat1, lon1, lat2, lon2):
    R = 6371.0 * 1000

    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c
    return round(distance, 2)


def get_station_info(station_name, df):
    station_rows = df[df["Station"] == station_name]
    if not station_rows.empty:
        station_info = {
            "Latitude": station_rows.iloc[0]["Latitude"],
            "Longitude": station_rows.iloc[0]["Longitude"],
            "Lines": set(station_rows["Line"]),
        }
        return station_info
    else:
        return None


def calculate_distance(row, df):
    origin_info = get_station_info(row["Station"], df)
    dest_info = get_station_info(row["Destination"], df)

    if origin_info and dest_info:
        if origin_info["Lines"].intersection(dest_info["Lines"]):
            return haversine_distance(
                origin_info["Latitude"],
                origin_info["Longitude"],
                dest_info["Latitude"],
                dest_info["Longitude"],
            )
    return None

In [244]:
df = pd.read_csv("Metro_Madrid_2021.csv")
df = df.drop("Traffic", axis=1)
df["Longitude"] = df["Longitude"].str.replace(",", ".").astype(float)
df["Latitude"] = df["Latitude"].str.replace(",", ".").astype(float)
df.insert(1, "Destination", df["Station"].shift(-1))
df.insert(2, "Distance", df.apply(lambda row: calculate_distance(row, df), axis=1))
df = df.dropna(subset=["Distance"])
df.drop(df.loc[df["Distance"] > 5000].index, inplace=True)

In [245]:
df.head(3)

Unnamed: 0,Station,Destination,Distance,Line,Order of Points,Longitude,Latitude
0,Pinar de Chamartin,Bambú,867.28,Linea 1,1,-3.667061,40.480136
1,Bambú,Chamartín,754.71,Linea 1,2,-3.676374,40.476872
2,Chamartín,Plaza de Castilla,799.03,Linea 1,3,-3.682768,40.472138


In [246]:
graph_df = df[["Station", "Destination", "Distance", "Line"]]
graph = {}

for row in df.itertuples():
    station = row.Station
    destination = row.Destination
    distance = row.Distance
    line = row.Line

    if pd.isna(distance):
        continue

    if station not in graph:
        graph[station] = []

    graph[station].append((destination, distance, line))

In [272]:
fig = px.scatter_mapbox(
    df, lat="Latitude", lon="Longitude", hover_name="Station", zoom=10
)
fig.update_layout(mapbox_style="carto-positron", mapbox_zoom=10.5, width=1400, height=1000)
fig.show()