In [None]:
import sys
import os
import re
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))
import pandas as pd
import plotly.express as px
import numpy as np
import geopandas as gpd
from pyproj import Transformer
from scripts.utils import split_column_at
from scripts.accent_cleaner import AccentCleaner
#from deep_translator import GoogleTranslator

### Airports

In [None]:
# Read the GeoJSON file
gdf = gpd.read_file('../data/raw/spanish_airports.geojson')
# Ensure that the geometry column contains points
# Extract latitude and longitude from the geometry column
gdf['latitude'] = gdf.geometry.y
gdf['longitude'] = gdf.geometry.x

# Now drop the geometry column if you don't need it anymore
df_airports = gdf.drop(columns='geometry')

df_airports.rename(columns={"Texto": "airport_name", "OBJECTID": "airport_id", "lat": "latitude", "lon": "longitude"}, inplace=True)
df_airports.head()

In [None]:
title = "Airports in Spain"

lats = df_airports.latitude
lons = df_airports.longitude

fig = px.scatter_map(df_airports, 
                     lat=lats, 
                     lon=lons,
                     color_continuous_scale=px.colors.carto.Aggrnyl,
                     zoom=5,
                     size_max=50  # Increase max size of markers
                     )

# Adjust the size reference to make small points more visible
fig.update_traces(marker=dict(sizeref=1000))  # Decrease this value to make points larger

fig.update_geos(fitbounds="locations")
fig.update_layout(height=1000, width=1000)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":50,"t":50,"l":50,"b":50})
fig.update_layout(
    coloraxis_colorbar=dict(title='asdf')
)
fig.update_layout(title="Airports in Spain ")

fig.show()

In [None]:
df_airports.to_csv("../data/processed/filtered_airports.csv", index=False)

### Trains

In [None]:
df_trains = pd.read_csv("../data/raw/listado_completo_av_ld_md.csv", sep=";", decimal=",")
df_trains = df_trains.drop(["PAIS", "C.P.", "DIRECCIÓN"], axis = 1)
df_trains.rename(columns={" CÓDIGO": "station_code",
                          "DESCRIPCION": "station_name",
                          "LATITUD": "latitude",
                          "POBLACION": "municipality",
                          "PROVINCIA": "province",
                          "LONGITUD": "longitude"}, inplace=True)

In [None]:
df_trains.isnull().sum()

In [None]:
cleaner = AccentCleaner([df_trains], columns=["province", "municipality"])
cleaner.cleanAccents()
df_trains

In [None]:
df_trains["municipality_clean"] = df_trains["municipality_clean"].str.split("/", expand=False)
df_trains["province_clean"] = df_trains["province_clean"].str.split("/", expand=False)

df_trains["municipality_clean"] = df_trains["municipality_clean"].apply(
    lambda x: x[0] if len(x) == 1 else x[1]
)
df_trains["province_clean"] = df_trains["province_clean"].apply(
    lambda x: x[0] if len(x) == 1 else x[1]
)




In [None]:
df_trains.sample(20)

In [None]:
df_trains['station_name'] = df_trains['station_name'].str.title()
df_trains['municipality'] = df_trains['municipality'].str.lower()
df_trains['province'] = df_trains['province'].str.lower()

In [None]:
if len(df_trains["province"]):
    df_trains.drop(columns=["province", "municipality"], inplace=True)

df_trains

In [None]:
title = "Train Stations (LD, MD, High Speed) in Spain"

lats = df_trains.latitude
lons = df_trains.longitude

fig = px.scatter_map(df_trains, 
                     lat=lats, 
                     lon=lons,
                     hover_data=["municipality_clean", "station_name"], 
                     color_continuous_scale=px.colors.carto.Aggrnyl,
                     zoom=5,
                     size_max=50  # Increase max size of markers
                     )

# Adjust the size reference to make small points more visible
fig.update_traces(marker=dict(sizeref=1000))  # Decrease this value to make points larger

fig.update_geos(fitbounds="locations")
fig.update_layout(height=1000, width=1000)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":50,"t":50,"l":50,"b":50})
fig.update_layout(
    coloraxis_colorbar=dict(title='asdf')
)
fig.update_layout(title="Train Stations in Spain ")

fig.show()

In [None]:
df_trains.to_csv("../data/processed/filtered_trains.csv", index=False)