# 0. Imports

In [None]:
import pandas as pd
import json
import requests
import time
from datetime import datetime, date, timedelta
from pytz import timezone

# names of the cities: https://en.wikipedia.org/wiki/List_of_cities_and_towns_in_Austria
# city and airport code: https://www.wikidata.org/wiki/Wikidata:Main_Page (looked up manually)

# 1. Cities Data

In [None]:
qcities = [    # 10 German cities by population
    'Q64',    # Berlin: federal state, capital and largest city of Germany
    #'Q1055',  # Hamburg: city and federal state in the North of Germany
    #'Q1726',  # München: capital and most populous city of Bavaria, Germany
    'Q365',   # Köln: city in North Rhine-Westphalia, Germany
    #'Q1794',  # Frankfurt am Main: city in Hesse, Germany
    #'Q1022',  # Stuttgart: capital city of German federated state Baden-Württemberg
    #'Q1718',  # Düsseldorf: capital city of the German federated state of North Rhine-Westphalia
    #'Q1295',  # Dortmund: city in North Rhine-Westphalia, Germany
    #'Q2066',  # Essen: city in North Rhine-Westphalia, Germany
    #'Q2079',  # Leipzig: most populous city in the German state of Saxony
                  
              # 5 Austrian cities by population
    #'Q1741',  # Wien: capital of and state in Austria
    #'Q13298', # Graz: capital of Styria, Austria
    #'Q41329', # Linz: capital city of Upper Austria, Austria
    #'Q34713', # Salzburg: capital city of the federal state of Salzburg in Austria
    #'Q1735'   # Innsbruck: capital of the state of Tyrol, Austria
         ]

In [None]:
def get_cities(qcities):
  cities_list = []
  for qcity in qcities:
    url = f"https://wft-geo-db.p.rapidapi.com/v1/geo/cities/{qcity}"
    headers = {"X-RapidAPI-Host": "wft-geo-db.p.rapidapi.com", "X-RapidAPI-Key": "xxx"}
    response = requests.request("GET", url, headers=headers)
    time.sleep(2)
    city_df = pd.json_normalize(response.json())
    cities_list.append(city_df)
  cities_df = pd.concat(cities_list, ignore_index = True)
  cities_df = cities_df[["data.wikiDataId",
                         "data.city",
                         "data.country",
                         "data.elevationMeters",
                         "data.latitude",
                         "data.longitude",
                         "data.population"]]
  cities_df.rename(columns = {'data.wikiDataId': 'city_id',
                            'data.city': 'city' ,
                            'data.country': 'country',
                            'data.elevationMeters': 'elevation',
                            'data.latitude': 'city_latitude',
                            'data.longitude': 'city_longitude',
                            'data.population': 'population'},
                 inplace = True)
  return cities_df

In [None]:
cities_df= get_cities(qcities)
cities_df

Unnamed: 0,city_id,city,country,elevation,city_latitude,city_longitude,population
0,Q64,Berlin,Germany,,52.516667,13.383333,3664088
1,Q365,Cologne,Germany,52.0,50.942222,6.957778,1083498


### File export

In [None]:
cities_df.to_csv('cities_df.csv', index=False)

from google.colab import files
files.download("cities_df.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# 2. Weather Data

In [None]:
cities = cities_df['city'].to_list()
def get_weather(cities):
  df_list = []
  for city in cities:
    url = f"http://api.openweathermap.org/data/2.5/forecast?q={city}&appid=ae4f0da7da6fba066d93d0563204853c&units=metric"
    weather = requests.get(url)
    weather_df = pd.json_normalize(weather.json()["list"])
    weather_df["city"] = city
    df_list.append(weather_df)
  weather_combined = pd.concat(df_list, ignore_index = True)
  weather_combined = weather_combined[["pop", "dt_txt", "main.temp", "main.humidity", "clouds.all", "wind.speed", "wind.gust", "city"]]
  weather_combined.rename(columns = {'pop': 'precip_prob', 
                               'dt_txt': 'datetime', 
                               'main.temp': 'temperature', 
                               'main.humidity': 'humidity', 
                               'clouds.all': 'cloudiness', 
                               'wind.speed': 'wind_speed', 
                               'wind.gust': 'wind_gust'}, 
                    inplace = True)
  weather_combined['datetime'] = pd.to_datetime(weather_combined['datetime'])
  return weather_combined

In [None]:
weather_data = get_weather(cities)

In [None]:
weather_data

Unnamed: 0,precip_prob,datetime,temperature,humidity,cloudiness,wind_speed,wind_gust,city
0,0.00,2022-06-14 15:00:00,18.71,50,44,4.36,5.69,Berlin
1,0.00,2022-06-14 18:00:00,18.33,50,41,2.68,4.04,Berlin
2,0.00,2022-06-14 21:00:00,14.96,61,56,1.39,1.59,Berlin
3,0.00,2022-06-15 00:00:00,14.07,65,78,1.91,2.74,Berlin
4,0.00,2022-06-15 03:00:00,12.42,70,67,1.43,2.27,Berlin
...,...,...,...,...,...,...,...,...
75,0.63,2022-06-19 00:00:00,19.36,91,75,2.67,6.73,Cologne
76,0.00,2022-06-19 03:00:00,18.73,92,9,1.33,3.08,Cologne
77,0.00,2022-06-19 06:00:00,22.27,77,10,1.89,3.06,Cologne
78,0.00,2022-06-19 09:00:00,28.47,48,50,2.20,4.05,Cologne


In [None]:
weather_d = weather_data.merge(cities_df[['city_id', 'city']], how = 'left', on = ['city'])
weather_d = weather_d[['datetime',
                             'city_id',
                             'city',
                             'precip_prob',
                             'temperature',
                             'humidity',
                             'cloudiness',
                             'wind_speed',
                             'wind_gust']]

In [None]:
weather_d

Unnamed: 0,datetime,city_id,city,precip_prob,temperature,humidity,cloudiness,wind_speed,wind_gust
0,2022-06-14 15:00:00,Q64,Berlin,0.00,18.71,50,44,4.36,5.69
1,2022-06-14 18:00:00,Q64,Berlin,0.00,18.33,50,41,2.68,4.04
2,2022-06-14 21:00:00,Q64,Berlin,0.00,14.96,61,56,1.39,1.59
3,2022-06-15 00:00:00,Q64,Berlin,0.00,14.07,65,78,1.91,2.74
4,2022-06-15 03:00:00,Q64,Berlin,0.00,12.42,70,67,1.43,2.27
...,...,...,...,...,...,...,...,...,...
75,2022-06-19 00:00:00,Q365,Cologne,0.63,19.36,91,75,2.67,6.73
76,2022-06-19 03:00:00,Q365,Cologne,0.00,18.73,92,9,1.33,3.08
77,2022-06-19 06:00:00,Q365,Cologne,0.00,22.27,77,10,1.89,3.06
78,2022-06-19 09:00:00,Q365,Cologne,0.00,28.47,48,50,2.20,4.05


### File export

In [None]:
weather_d.to_csv('weather_df.csv', index=False)

from google.colab import files
files.download("weather_df.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# 3. Airports Data

In [None]:
lat = cities_df["city_latitude"].to_list()
lat

[52.516666666, 50.942222222]

In [None]:
lon = cities_df["city_longitude"].to_list()
lon

[13.383333333, 6.957777777]

In [None]:
def get_airports(lat, lon):
  airport_list = []
  for i in range(len(lat)):
    url = f"https://aerodatabox.p.rapidapi.com/airports/search/location/{lat[i]}/{lon[i]}/km/50/10"
    querystring = {"withFlightInfoOnly":"true"}
    headers = {"X-RapidAPI-Host": "aerodatabox.p.rapidapi.com", "X-RapidAPI-Key": "xxx"}
    response = requests.request("GET", url, headers=headers, params=querystring)
    airport_df = pd.json_normalize(response.json()["items"])
    airport_list.append(airport_df)
  airports_df = pd.concat(airport_list, ignore_index = True)
  airports_df.drop_duplicates(subset ='icao', inplace = True)
  airports_df = airports_df[~airports_df.name.str.contains("Air Base", case = False)]
  airports_df.drop(columns = ['iata', 'shortName'], inplace = True)
  airports_df.reset_index(drop = True, inplace = True)
  airports_df['city_id'] = [
                          'Q64',
                          'Q64',  
                          #'Q1055',
                          #'Q1726',
                          'Q365', 
                          'Q1718',
                          #'Q1794',
                          #'Q1022',
                          #'Q1295',
                          #'Q2079',   
                          #'Q1741',
                          #'Q13298',
                          #'Q41329',
                          #'Q34713',
                          #'Q1735'
                          ]
  airports_df.rename(columns = {'name': 'airport_name',
                                'municipalityName': 'municipality_name',
                                'countryCode': 'country_code',
                                'location.lat': 'airport_latitude',
                                'location.lon': 'airport_longitude',
                                },
                                inplace = True)
  return airports_df

In [None]:
airports_df = get_airports(lat, lon)

### File export

In [None]:
airports_df.to_csv('airports_df.csv', index=False)

from google.colab import files
files.download("airports_df.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# 4. Flights Data

In [None]:
icao = airports_df['icao'].to_list()

In [None]:
def get_arrivals(icao):
  arrival_list = []
  today = datetime.now().astimezone(timezone('Europe/Berlin')).date()
  tomorrow = (today + timedelta(days = 1))
  for code in icao:
    url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{code}/{tomorrow}T10:00/{tomorrow}T22:00"
    querystring = {"withLeg":"false","direction":"Arrival","withCancelled":"false","withCodeshared":"true",
                    "withCargo":"false","withPrivate":"false","withLocation":"false"}
    headers = {"X-RapidAPI-Key": "xxx", "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"}
    response = requests.request("GET", url, headers = headers, params = querystring)
    arrival_df = pd.json_normalize(response.json()["arrivals"])
    arrival_df["arrival_icao"] = code
    arrival_list.append(arrival_df)
  arrivals_df = pd.concat(arrival_list, ignore_index = True)
  arrivals_df.drop(columns = ['isCargo',
                                 'status',
                                 'callSign',
                                 'codeshareStatus',
                                 'movement.airport.iata',
                                 'movement.actualTimeLocal',
                                 'movement.quality',
                                 'aircraft.reg',
                                 'aircraft.modeS',
                                 'movement.terminal',
                                 'movement.scheduledTimeUtc',
                                 'movement.actualTimeUtc',
                                 'movement.baggageBelt',
                                 'movement.gate'],
                      inplace = True)
  arrivals_df.rename(columns = {'number': 'flight_number',
                                   'movement.airport.icao': 'departure_icao',
                                   'movement.airport.name': 'departure_airport',
                                   'movement.scheduledTimeLocal': 'scheduled_time',
                                   'aircraft.model': 'aircraft_model',
                                   'airline.name': 'airline_name'},
                        inplace = True)
  arrivals_df['scheduled_time'] = pd.to_datetime(arrivals_df['scheduled_time'])
  return arrivals_df

In [None]:
arrivals_df = get_arrivals(icao)
arrivals_df

Unnamed: 0,arrival_icao,number,status,codeshareStatus,isCargo,movement.airport.name,movement.scheduledTimeLocal,movement.scheduledTimeUtc,movement.terminal,movement.quality,...,airline.name,movement.airport.icao,movement.airport.iata,aircraft.reg,aircraft.modeS,callSign,movement.actualTimeLocal,movement.actualTimeUtc,movement.gate,movement.baggageBelt
0,EDDB,U2 4562,Unknown,Unknown,False,Amsterdam,2022-06-15 10:30+02:00,2022-06-15 08:30Z,1,[Basic],...,easyJet,,,,,,,,,
1,EDDB,W2 1250,Unknown,Unknown,False,Bolzano,2022-06-15 10:50+02:00,2022-06-15 08:50Z,1,[Basic],...,FlexFlight,,,,,,,,,
2,EDDB,U2 5514,Unknown,Unknown,False,Keln,2022-06-15 20:35+02:00,2022-06-15 18:35Z,1,[Basic],...,easyJet,,,,,,,,,
3,EDDB,OG 700,Unknown,Unknown,False,Reykjavik,2022-06-15 11:25+02:00,2022-06-15 09:25Z,1,[Basic],...,Ghodawat Enterprises,BIKF,KEF,,,,,,,
4,EDDB,FI 528,Unknown,Unknown,False,Reykjavik,2022-06-15 13:10+02:00,2022-06-15 11:10Z,1,[Basic],...,Icelandair,BIKF,KEF,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409,EDDL,TK 1529,Expected,IsOperator,False,Istanbul,2022-06-15 20:15+02:00,2022-06-15 18:15Z,C,"[Basic, Live]",...,Turkish,LTFM,IST,,,THY5NV,2022-06-15 20:15+02:00,2022-06-15 18:15Z,C48,
410,EDDL,5F 685,Expected,IsOperator,False,Chişinău,2022-06-15 14:25+02:00,2022-06-15 12:25Z,C,"[Basic, Live]",...,Fly One,LUKK,KIV,,,FIA685,2022-06-15 14:25+02:00,2022-06-15 12:25Z,C36,
411,EDDL,EW 9923,Expected,IsOperator,False,Belgrade,2022-06-15 16:25+02:00,2022-06-15 14:25Z,C,"[Basic, Live]",...,Eurowings,LYBE,BEG,D-AGWG,3C5EE7,EWG8HB,2022-06-15 16:25+02:00,2022-06-15 14:25Z,C48,
412,EDDL,ME 247,Expected,IsOperator,False,Beirut,2022-06-15 19:00+02:00,2022-06-15 17:00Z,C,"[Basic, Live]",...,Middle East,OLBA,BEY,,,MEA247,2022-06-15 19:00+02:00,2022-06-15 17:00Z,C42,


### File export

In [None]:
arrivals_df_copy.to_csv('arrivals_df_copy.csv', index=False)

from google.colab import files
files.download("arrivals_df_copy.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>