# <span style="color:#6FFFE9">Preparing for AWS</span> 

## <span style="color:#ffadad">Import libraries</span>

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import sqlalchemy
from datetime import datetime, date, timedelta
import api_pw
import re


## <span style="color:#ffadad">Get City Data </span>

In [2]:

cities = ['Berlin', 'Heidelberg', 'Cologne']

list_for_df = []

for city in cities:

    url = f'https://en.wikipedia.org/wiki/{city}'
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'html.parser')

    response_dict = {}

    response_dict['city'] = soup.select(".firstHeading")[0].get_text()
    response_dict['country'] = soup.select(".infobox-data")[0].get_text()
   
    if soup.select_one('th.infobox-header:-soup-contains("Population")'):
        response_dict['population'] = soup.select_one('th.infobox-header:-soup-contains("Population")').parent.find_next_sibling().find(text=re.compile(r'\d+'))
    
    response_dict['latitude'] = soup.select(".latitude")[0].get_text()
    response_dict['longitude'] = soup.select(".longitude")[0].get_text()
    
    list_for_df.append(response_dict)

cities_df = pd.DataFrame(list_for_df)

cities_df['latitude'] = cities_df['latitude'].str.split('″').str[0].str.replace('°', '.', regex=False).str.replace('′', '', regex=False).str.replace('N', '00', regex=False)

cities_df['longitude'] = cities_df['longitude'].str.split('″').str[0].str.replace('°', '.', regex=False).str.replace('′', '', regex=False).str.replace('E', '00', regex=False)

#cities_df.iloc[3,1] = cities_df.iloc[3,1][-7:-1]

cities_df['population'] = cities_df['population'].str.replace(',', '', regex=False)
cities_df['population'] = cities_df['population'].astype(int)
cities_df['latitude'] = cities_df['latitude'].astype(float)
cities_df['longitude'] = cities_df['longitude'].astype(float)
cities_df

Unnamed: 0,city,country,population,latitude,longitude
0,Berlin,Germany,3677472,52.3112,13.2418
1,Heidelberg,Germany,159245,49.25,8.43
2,Cologne,Germany,1073096,50.5611,6.571


In [3]:
#host = 'wbs-aws-db.xxxxxxxxxxxx'
schema = 'gans_aws' 
user = 'admin'
password = api_pw.sql_pw  # save in the api_pw.py file
port = 3306
connection_details = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}' 



In [4]:
cities_df.to_sql('city', if_exists='append', con=connection_details, index=False) 

3

## <span style="color:#ffadad">Preparing Lambda Function in AWS </span>

In [None]:
import json
import pandas as pd
import sqlalchemy
import requests
import sqlalchemy
from datetime import datetime, date, timedelta
from pytz import timezone
import api_pw

def get_weather(city_df):

    api_key = api_pw.openweather_api

    weather_cities_dict = {"city_id"         : [],
                           "Forecast_time"   : [],
                           "Weather_desc"    : [],
                           "Temperature"     : [],
                           "Rain_probability": [],
                           "Humidity"        : []
                          }

    for i, city in enumerate(city_df['city_id']):
        weather_cities = requests.get(f"http://api.openweathermap.org/data/2.5/forecast?lat={city_df.iloc[i]['latitude']}&lon={city_df.iloc[i]['longitude']}&appid={api_key}&units=metric").json()

        for j in weather_cities["list"]:
            weather_cities_dict["city_id"].append(city)
            weather_cities_dict['Forecast_time'].append(j['dt_txt'])
            weather_cities_dict['Weather_desc'].append(j['weather'][0]['description'])
            weather_cities_dict['Temperature'].append(j['main']['temp'])
            weather_cities_dict['Humidity'].append(j['main']['humidity'])

            try:
                weather_cities_dict['Rain_probability'].append(j['rain']['3h'])
            except:
                weather_cities_dict['Rain_probability'].append('0')

    weather_df = pd.DataFrame(weather_cities_dict)
    return weather_df


def tomorrows_flight_arrivals(icao_df):

  today = datetime.now().astimezone(timezone('Europe/Berlin')).date()
  tomorrow = (today + timedelta(days=1))

  list_for_df = [] 

  for i, icao in enumerate(icao_df["icao"]):
    times = [["00:00","11:59"],["12:00","23:59"]] 

    for time in times: 
      url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{tomorrow}T{time[0]}/{tomorrow}T{time[1]}"
      querystring = {"withLeg":"true","direction":"Arrival","withCancelled":"false","withCodeshared":"true","withCargo":"false","withPrivate":"false"}
      headers = {
          'x-rapidapi-host': "aerodatabox.p.rapidapi.com",
          'x-rapidapi-key': api_pw.flight_api
          }
      response = requests.request("GET", url, headers=headers, params=querystring)
      flights_json = response.json()

      for flight in flights_json['arrivals']:
        flights_dict = {}
        flights_dict['arrival_icao'] = icao
        flights_dict['flight_number'] = flight.get('number', None)
        flights_dict['airline'] = flight['airline'].get('name', None)
        flights_dict['arrival_terminal'] = flight['arrival'].get('terminal', None)
        flights_dict['arrival_time_local'] = flight['arrival'].get('scheduledTimeLocal', None)
        flights_dict['departure_icao'] = flight['departure']['airport'].get('icao', None)
        flights_dict['departure_city'] = flight['departure']['airport'].get('name', None)
        flights_dict['departure_time_local'] = flight['departure'].get('scheduledTimeLocal', None)        
        flights_dict['data_retrieved_on'] = datetime.now().astimezone(timezone('Europe/Berlin')).date()
        list_for_df.append(flights_dict)

  flight_df = pd.DataFrame(list_for_df)
  flight_df["arrival_time_local"] = pd.to_datetime(flight_df["arrival_time_local"])
  flight_df["departure_time_local"] = pd.to_datetime(flight_df["departure_time_local"],utc=True)
  flight_df["data_retrieved_on"] = pd.to_datetime(flight_df["data_retrieved_on"])
  
  return flight_df


def lambda_handler(event, context):
    host = 'wbs-project-db.c90scngdiihl.us-east-1.rds.amazonaws.com'
    schema = 'gans_aws' 
    user = 'admin'
    password = api_pw.sql_pw
    port = 3306
    connection_details = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}' 
    
    city_df = pd.read_sql('city',con=connection_details)
    
    weather_df = get_weather(city_df)
    weather_df.to_sql('weather',con=connection_details,if_exists='append',index=False)

    icao_df = pd.read_sql('icao',con=connection_details)

    flight_df = tomorrows_flight_arrivals(["EDDB"])
    flight_df.to_sql('flight',con=connection_details, if_exists='append', index=False) 
                       
              
  
    # TODO implement
    return {
        'statusCode': 200,
        'body': json.dumps('Hello from Lambda!')
    }