In [1]:
import requests
import pandas as pd
import numpy as np
from io import StringIO

In [2]:
url_passengers = "https://datos.canarias.es/api/estadisticas/statistical-resources/v1.0/datasets/ISTAC/C00017A_000001/~latest.csv?lang=en"
url_gm = "https://datos.canarias.es/api/estadisticas/statistical-resources/v1.0/datasets/ISTAC/C00017A_000002/~latest.csv"
url_op = "https://datos.canarias.es/api/estadisticas/statistical-resources/v1.0/datasets/ISTAC/C00017A_000003/~latest.csv"

In [3]:
# Send HTTP GET request
response = requests.get(url_passengers)

# Check if the request was successful
if response.status_code == 200:
    # Read CSV data using pandas
    csv_data = StringIO(response.content.decode("utf-8"))
    data_t = pd.read_csv(csv_data)
else:
    print("Failed to retrieve data. Status code:", response.status_code)

In [60]:
df = data_t.copy(deep=True)

In [61]:
df.drop(columns=df.columns[df.columns.str.endswith('#es')], inplace=True)

In [62]:
df['AEROPUERTO_ESCALA_CODE']

0               CV
1               CV
2               CV
3               CV
4               CV
            ...   
2963515    SE_ESMS
2963516    SE_ESMS
2963517    SE_ESMS
2963518    SE_ESMS
2963519    SE_ESMS
Name: AEROPUERTO_ESCALA_CODE, Length: 2963520, dtype: object

In [63]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2963520 entries, 0 to 2963519
Data columns (total 16 columns):
 #   Column                           Dtype  
---  ------                           -----  
 0   SERVICIO_AEREO#en                object 
 1   SERVICIO_AEREO_CODE              object 
 2   MEDIDAS#en                       object 
 3   MEDIDAS_CODE                     object 
 4   MOVIMIENTO_AERONAVE#en           object 
 5   MOVIMIENTO_AERONAVE_CODE         object 
 6   TIME_PERIOD#en                   object 
 7   TIME_PERIOD_CODE                 object 
 8   AEROPUERTO_BASE#en               object 
 9   AEROPUERTO_BASE_CODE             object 
 10  AEROPUERTO_ESCALA#en             object 
 11  AEROPUERTO_ESCALA_CODE           object 
 12  OBS_VALUE                        float64
 13  ESTADO_OBSERVACION#en            object 
 14  ESTADO_OBSERVACION_CODE          object 
 15  CONFIDENCIALIDAD_OBSERVACION#en  float64
dtypes: float64(2), object(14)
memory usage: 361.8+ MB


Steps to getting the airports right:
1) Delete countries, autonomous communities and "Rest of"/"Remain of"
2) Join with airports from ourairports using word similitude or something like that

[Understanding _CODE from airports](https://www3.gobiernodecanarias.org/aplicaciones/appsistac/activos-semanticos/codelists/codelists/ISTAC/CL_AEROPUERTOS/01.004/detail)

I think I can do all I did (selecting only airports) with the API XD, with the "granularity" option, tomorrow ill check

In [73]:
# Rest of/Remain of AEROPUERTO_ESCALA#en have "_O" at the end of their code
df_f = df.loc[~df['AEROPUERTO_ESCALA_CODE'].str.endswith('_O')]

# Delete entries with the whole country
df_f = df_f.loc[~df_f['AEROPUERTO_ESCALA_CODE'].str.match(r'^[A-Z]{2}$', na=False)]

# Delete autonomous communities (Their code is like ES[0-9][0-9]) 
df_f = df_f.loc[~df_f['AEROPUERTO_ESCALA_CODE'].str.match(r'^ES[0-9]{2}$', na=False)]

# Delete sum of entire island
df_f = df_f.loc[~df_f['AEROPUERTO_ESCALA_CODE'].str.match(r'^ES70[0-9]$', na=False)]

# Delete sum of all autonomous communities and sum of all islands
df_f = df_f.loc[~((df_f['AEROPUERTO_ESCALA_CODE'] == 'ES_XES70') | (df_f['AEROPUERTO_ESCALA_CODE'] == 'ES70'))]

In [74]:
df_f

Unnamed: 0,SERVICIO_AEREO#en,SERVICIO_AEREO_CODE,MEDIDAS#en,MEDIDAS_CODE,MOVIMIENTO_AERONAVE#en,MOVIMIENTO_AERONAVE_CODE,TIME_PERIOD#en,TIME_PERIOD_CODE,AEROPUERTO_BASE#en,AEROPUERTO_BASE_CODE,AEROPUERTO_ESCALA#en,AEROPUERTO_ESCALA_CODE,OBS_VALUE,ESTADO_OBSERVACION#en,ESTADO_OBSERVACION_CODE,CONFIDENCIALIDAD_OBSERVACION#en
280,Commercial,COMMERCIAL,Passengers,PASAJEROS,Total,_T,02/2014,2014-M02,Tenerife Norte Airport,ES_GCXO,Nouadhibou International Airport,MR_GQPP,,,,
281,Commercial,COMMERCIAL,Passengers,PASAJEROS,Total,_T,03/2014,2014-M03,Tenerife Norte Airport,ES_GCXO,Nouadhibou International Airport,MR_GQPP,,,,
282,Commercial,COMMERCIAL,Passengers,PASAJEROS,Total,_T,04/2014,2014-M04,Tenerife Norte Airport,ES_GCXO,Nouadhibou International Airport,MR_GQPP,,,,
283,Commercial,COMMERCIAL,Passengers,PASAJEROS,Total,_T,05/2014,2014-M05,Tenerife Norte Airport,ES_GCXO,Nouadhibou International Airport,MR_GQPP,,,,
284,Commercial,COMMERCIAL,Passengers,PASAJEROS,Total,_T,06/2014,2014-M06,Tenerife Norte Airport,ES_GCXO,Nouadhibou International Airport,MR_GQPP,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2963515,Scheduled,SCHEDULED,Passengers,PASAJEROS,Total,_T,10/2013,2013-M10,Lanzarote Airport,ES_GCRR,Malmö Sturup Airport,SE_ESMS,,,,
2963516,Scheduled,SCHEDULED,Passengers,PASAJEROS,Total,_T,11/2013,2013-M11,Lanzarote Airport,ES_GCRR,Malmö Sturup Airport,SE_ESMS,,,,
2963517,Scheduled,SCHEDULED,Passengers,PASAJEROS,Total,_T,12/2013,2013-M12,Lanzarote Airport,ES_GCRR,Malmö Sturup Airport,SE_ESMS,,,,
2963518,Scheduled,SCHEDULED,Passengers,PASAJEROS,Total,_T,2013,2013,Lanzarote Airport,ES_GCRR,Malmö Sturup Airport,SE_ESMS,,,,


In [71]:
df_f['AEROPUERTO_ESCALA#en'].sort_values().unique()

array(['A Coruña Airport', 'Aalborg Airport', 'Aarhus Airport',
       'Aberdeen Dyce Airport', 'Adolfo Suárez Madrid-Barajas Airport',
       'Al Massira Airport', 'Alicante International Airport',
       'Amsterdam Airport Schiphol',
       'Amílcar Cabral International Airport', 'Asturias Airport',
       'Banjul International Airport', 'Barcelona International Airport',
       'Belfast International Airport', 'Bergen Airport Flesland',
       'Berlin-Schönefeld Airport', 'Berlin-Tegel Airport',
       'Bilbao Airport', 'Billund Airport',
       'Birmingham International Airport',
       'Blackpool International Airport', 'Bodô Airport',
       'Bologna Guglielmo Marconi Airport', 'Bordeaux-Mérignac Airport',
       'Borlange Airport', 'Boryspil International Airport',
       'Bournemouth Airport', 'Bremen Airport', 'Brest Bretagne Airport',
       'Bristol Airport', 'Brussels Airport',
       'Brussels South Charleroi Airport',
       'Budapest Liszt Ferenc International Airport',


In [68]:
istac_airports = df['AEROPUERTO_ESCALA#en'].sort_values().unique()

In [69]:
airport_csv = pd.read_csv('airports.csv')