In [None]:
import json
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
from datetime import datetime, timedelta
import pytz
import sqlalchemy


# defining a function called weather_data

def weather_data():
    
    
    
    cities = ['Frankfurt', 'Berlin', 'Cologne', 'Munich', 'Hamburg']
    
    # providing the api_key obtained from https://openweathermap.org/api
    API_key = 'xxx'
    
    # setting the time zone to 'Europe/Berlin' and retrieving the current time
    tz = pytz.timezone('Europe/Berlin')
    now = datetime.now().astimezone(tz)
    
    # empty list that will be filled with information
    weather_dict = {'city_id': [],
                    'city': [],
                    'country': [],
                    'forecast_time': [],
                    'outlook': [],
                    'detailed_outlook': [],
                    'temperature': [],
                    'temperature_feels_like': [],
                    'clouds': [],
                    'rain': [],
                    'snow': [],
                    'wind_speed': [],
                    'wind_deg': [],
                    'humidity': [],
                    'pressure': [],
                    'sea_level': [],
                    'grnd_level': [],                
                    'information_retrieved_at': []}
    
    # for loop retrieving weather data for different cities from https://openweathermap.org/api
    for city in cities:
        weather = requests.get(f'http://api.openweathermap.org/data/2.5/forecast?q={city}&appid={API_key}&units=metric')
        # converting the fetched data to json
        weather_json = weather.json()
        
        # for loop to assign city_id for each city that will be later in MySQL refered as a forign key together with above cities table
        for i in weather_json['list']:

            if weather_json['city']['name'] == 'Frankfurt am Main':
                weather_dict['city_id'].append('1')
            elif weather_json['city']['name'] == 'Berlin':
                weather_dict['city_id'].append('2')
            elif weather_json['city']['name'] == 'Cologne':
                weather_dict['city_id'].append('3')
            elif weather_json['city']['name'] == 'Munich':
                weather_dict['city_id'].append('4')
            else:
                weather_dict['city_id'].append('5')
                
            # extracting required informaton from the .json dictionary
            weather_dict['city'].append(weather_json['city']['name'])
            weather_dict['country'].append(weather_json['city']['country'])
            #weather_dict['forecast_time'].append(i['dt_txt'])
            weather_dict['forecast_time'].append((datetime.strptime((i['dt_txt']),'%Y-%m-%d %H:%M:%S')).strftime('%d-%m-%Y %H:%M'))
            weather_dict['outlook'].append(i['weather'][0]['main'])
            weather_dict['detailed_outlook'].append(i['weather'][0]['description'])
            weather_dict['temperature'].append(i['main']['temp'])
            weather_dict['temperature_feels_like'].append(i['main']['feels_like'])
            weather_dict['sea_level'].append(i['main']['sea_level'])
            weather_dict['grnd_level'].append(i['main']['grnd_level'])
            weather_dict['clouds'].append(i['clouds']['all'])
            # since there is no rain and snow always, some of the json dictionarys wonm't have information regarding rain and snow,
            # so to handle this situation introducing the try except
            try:
                weather_dict['rain'].append(i['rain']['3h'])
            except:
                weather_dict['rain'].append('0')
            try:
                weather_dict['snow'].append(i['snow']['3h'])
            except:
                weather_dict['snow'].append('0')
            weather_dict['wind_speed'].append(i['wind']['speed'])
            weather_dict['wind_deg'].append(i['wind']['deg'])
            weather_dict['humidity'].append(i['main']['humidity'])
            weather_dict['pressure'].append(i['main']['pressure'])
            weather_dict['information_retrieved_at'].append(now.strftime('%d-%m-%Y %H:%M'))    
    # creating dataframe from the dictionary weather_dict
    weather_from_dict_df = pd.DataFrame(weather_dict)
    # converting 'forecast_time' & 'information_retrieved_at' to datetime format, because this made problems while pushing data to MySQL
    weather_from_dict_df['forecast_time'] = pd.to_datetime(weather_from_dict_df['forecast_time'])
    weather_from_dict_df['information_retrieved_at'] = pd.to_datetime(weather_from_dict_df['information_retrieved_at'])
    
    
    return weather_from_dict_df



# defining a function called flight_arrivals

def flight_arrivals():
    # cities = ['EDDF', 'EDFM', 'EDDB', 'EDDK', 'EHBK', 'EDDM', 'EDDH']
    cities = ['EDDF']
    # creating an empty dictionary to store airport arrival information
    arrivals = {'icao_code':[],
                'flight_number':[],
                'airline_name':[],
                'arrival_time':[],
                'arrival_terminal':[],
                'departure_airport':[],               
                'aircraft_model':[],
                'information_retrieved_at':[]
               }
    
    # setting current time zone to Berlin
    tz = pytz.timezone('Europe/Berlin')
    # getting current time
    now = datetime.now().astimezone(tz)
    # formatting and storing current time to now
    current_date_time = (now.strftime('%Y-%m-%dT%H:%M'))
    current_date_time_table = (now.strftime('%d-%m-%Y %H:%M'))
    # calculating and time after 12 hours, as the max time limit of API call 12 hours
    end_time_cal = datetime.now() + timedelta(hours=12)
    end_date_time = end_time_cal.strftime('%Y-%m-%dT%H:%M')
    
    # for loop for looping through different airports in the cities list
    for city in cities:
        url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{city}/{current_date_time}/{end_date_time}"
        querystring = {"withLeg":"true","direction":"Arrival","withCancelled":"false","withCodeshared":"true","withCargo":"false","withPrivate":"true","withLocation":"false"}

        headers = {
                    "X-RapidAPI-Key": "xxx",
                    "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
                   }

        response = requests.request("GET", url, headers=headers, params=querystring)
        
        # if clause to avoid crash due to 204 error, for some of the airports there are no data availabe with aerodatabox
        if response.status_code != 204:        
            
            # extracting required information from json and appending it to arrivals dictionary
            for i in response.json()['arrivals']:
                arrivals['departure_airport'].append(i['departure']['airport']['name'])
                arrivals['airline_name'].append(i['airline']['name'])
                arrivals['flight_number'].append(i['number'])
                arrivals['arrival_time'].append((datetime.strptime((i['arrival']['scheduledTimeLocal'].split('+')[0]),'%Y-%m-%d %H:%M')).strftime('%d-%m-%Y %H:%M'))
                try:
                    arrivals['arrival_terminal'].append(i['arrival']['terminal'])
                except:
                    arrivals['arrival_terminal'].append('-')

                try:
                    arrivals['aircraft_model'].append(i['aircraft']['model'])
                except:
                    arrivals['aircraft_model'].append('-')

                arrivals['information_retrieved_at'].append(current_date_time_table)
                arrivals['icao_code'].append(city)


    # creating a dataframe from the dictionary arrivals
    arrivals_df = pd.DataFrame.from_dict(arrivals)
    
    # changing the datatype of arrival_time from str to datetime as it is required by MySQL
    arrivals_df['arrival_time']= pd.to_datetime(arrivals_df['arrival_time'])
    # changing the datatype of information_retrieved_at from str to datetime as it is required by MySQL
    arrivals_df['information_retrieved_at']= pd.to_datetime(arrivals_df['information_retrieved_at'])
    
    return arrivals_df
        
        
       



def lambda_handler(event, context):
    weather = weather_data()
    flights = flight_arrivals()
    
    
    
    schema="gans_e_bike"
    host="gans-e-bike.cfwkaou7hxt6.us-east-1.rds.amazonaws.com"
    user="admin"
    password="Subin8690"
    port=3306
    con = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'
    
    
    weather.to_sql('weather',con=con,if_exists='append',index=False)
    flights.to_sql('arrivals_airport',con=con,if_exists='append',index=False)

