In [1]:
import pandas as pd
import requests
from datetime import datetime, timedelta, date, time
import matplotlib.pyplot as plt 
import json
import api_keys

### Define function to get weather data for chosen cities

In [7]:
def get_weather(city_list):
    
    # define two empty lists that will store the data to be concatenated into two output dataframes at the end of the function
    forecast_data_list = []
    city_data_list = []

    for city in city_list:
        url = f'http://api.openweathermap.org/data/2.5/forecast?q={city}&appid={api_keys.openweather_api_key}&units=metric'
        response = requests.get(url)
        weather = response.json()

        # get information on city and store it for output to city_data table
        city_df = pd.json_normalize(weather['city']).rename(columns= {'id': 'city_id', 'name': 'city_name'}).drop(columns= ['sunset', 'sunrise'])
        city_data_list.append(city_df)

        # get weather forecast information (numerical) and outlook on sky (strings)
        weather_forecast_words = []
        for i in range(len(weather['list'])):
            weather_forecast = pd.json_normalize(weather['list'][i])
            weather_words = pd.json_normalize(weather['list'][i]['weather'])
            weather_forecast_words.append(pd.concat([weather_forecast, weather_words], axis= 1))
        
        # concatenate city information (unique identifiers city_id and city_name) and forecast data to allow for easy merging of city and weather data
        combined_forecast_cityname = []
        weather_df = pd.json_normalize(weather)
        cols = ['city.id', 'city.name', 'city.sunrise', 'city.sunset']

        for forecast in weather_forecast_words:
            combined_forecast_cityname.append(pd.concat([weather_df[cols], forecast], axis= 1))
        
        forecast_df = pd.concat(combined_forecast_cityname)
        forecast_data_list.append(forecast_df)

    # clean the resulting data by removing uninteresting columns and renaming all others to be more descriptive
    forecast_df = pd.concat(forecast_data_list).drop(columns= ['weather', 'main.temp_kf', 'icon'])
    forecast_df.columns = (['city_id', 'city_name', 'sunrise', 'sunset', 'timestamp', 'avg_visibility_m', 
        'precipitation_probability', 'date_and_time', 'temp', 'felt_temp', 'min_temp', 'max_temp', 'pressure', 
        'pressure_sea_lvl', 'pressure_ground_lvl', 'humidity_perc', 'cloudiness_perc', 'wind_speed_m_per_sec', 
        'wind_direction_degrees', 'wind_gust_m_per_sec', 'night_or_day', 'outlook_id', 'outlook_short', 'outlook_long', 
        'rain_vol_last_3_hrs'])

    # convert date and time column to datetime
    forecast_df['date_and_time'] = pd.to_datetime(forecast_df['date_and_time'])

    # return two dataframes: 
    # 1. forecast containing weather information
    # 2. city containing city information
    return forecast_df, pd.concat(city_data_list)

### Define function to check airports for each city and get icaos

In [9]:
def get_airport_dict(city_list):
    icao_list = []
    airport_name_list = []
    city_name_list = []

    for city in city_list:
        url = "https://aerodatabox.p.rapidapi.com/airports/search/term"

        querystring = {"q":f"{city}","limit":"10"}

        headers = {
            'X-RapidAPI-Key': api_keys.aerodatabox_api_key,
            'X-RapidAPI-Host': api_keys.aerodatabox_host
        }

        response = requests.request("GET", url, headers=headers, params=querystring)
        airports = response.json()

        for airport in airports['items']:
            icao_list.append(airport['icao'])
            airport_name_list.append(airport['name'])
            city_name_list.append(city)
    
    airport_city_name_list = list(map(list, zip(airport_name_list, city_name_list)))
    airport_dict = dict(zip(icao_list, airport_city_name_list))

    return airport_dict

### Define function to get timestamps for the next day between 8 AM and 8 PM

In [10]:
def get_timestamps():

    # get timestamp in correct format for tomorrow 8 AM until 8 PM
    date_tomorrow = date.today() + timedelta(days=1)
    time_start = time(hour=8, minute=0, second=0)
    time_end = time(hour=20, minute=0, second=0)

    datetime_start = datetime.combine(date_tomorrow, time_start).strftime('%Y-%m-%dT%H:%M')
    datetime_end = datetime.combine(date_tomorrow, time_end).strftime('%Y-%m-%dT%H:%M')

    return datetime_start, datetime_end

### Make api-call to receive flight-data

In [11]:
# make api call to receive flight information
def get_flight_data(airport_dict, datetime_start, datetime_end):

    flight_data_df_list = []

    for icao in airport_dict.keys():
        
        url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{datetime_start}/{datetime_end}"

        querystring = {"withLeg":"false","direction":"Arrival","withCancelled":"false","withCodeshared":"false","withCargo":"false","withPrivate":"false","withLocation":"false"}

        headers = {
            'X-RapidAPI-Key': api_keys.aerodatabox_api_key,
            'X-RapidAPI-Host': api_keys.aerodatabox_host
        }

        response = requests.request("GET", url, headers=headers, params=querystring)
        
        # catch errors - if response code is not good, icao is skipped
        if response.status_code != 200:
            continue

        arrivals = response.json()

        # catch errors - if data received from api is empty, icao is skipped
        if not arrivals['arrivals']:
            continue
        
        print(icao, 'is being processed.')
        flight_data_df = pd.json_normalize(arrivals['arrivals']).sort_values(by='movement.scheduledTimeLocal')

        # standardize df
        cols_to_keep = ['number', 'movement.airport.icao', 'movement.airport.iata',
        'movement.airport.name', 'movement.scheduledTimeLocal', 'movement.scheduledTimeUtc', 
        'airline.name']
        flight_data_df = flight_data_df[cols_to_keep]
        flight_data_df.rename(columns= {'number':'flight_id', 'movement.airport.icao':'origin_icao', 'movement.airport.iata':'origin_iata',
        'movement.airport.name':'origin_airport', 'movement.scheduledTimeLocal':'scheduled_time_local', 'movement.scheduledTimeUtc':'scheduled_time_utc', 
        'airline.name':'airline'}, inplace=True)
        flight_data_df['arrival_airport_name'] = airport_dict[icao][0]
        flight_data_df['city_name'] = airport_dict[icao][1]        
        flight_data_df_list.append(flight_data_df)

    flight_data_df = pd.concat(flight_data_df_list)
    return flight_data_df

### Execute functions

In [12]:
# choose cities of interest and get weather information for each city
city_list = ['Berlin', 'London', 'New York']
forecast_data, city_data = get_weather(city_list)
airport_dict = get_airport_dict(city_list)
datetime_start, datetime_end = get_timestamps()
flight_data = get_flight_data(airport_dict, datetime_start, datetime_end)

EDDB is being processed.
EGSS is being processed.
EGLL is being processed.
EGLC is being processed.
EGKK is being processed.
EGGW is being processed.
FAEL is being processed.
CYXU is being processed.
KLGA is being processed.
KJFK is being processed.


### Export data to .csv

In [13]:
forecast_data.to_csv(R'data/weather_forecast.csv')
city_data.to_csv(R'data/city_info.csv')
flight_data.to_csv(R'data/flight_data.csv')