In [4]:
import requests
import json
from pymongo import MongoClient
from requests.auth import HTTPBasicAuth
import re
from bson import json_util
import pandas as pd
import configparser
import time
import asyncio
from datetime import datetime, timedelta

In [18]:
MONGO_PASSWORD = ""
CONNECTION_STRING = f"mongodb+srv://admin:{MONGO_PASSWORD}@cluster0.lutwzgr.mongodb.net/?retryWrites=true&w=majority"

In [19]:
client = MongoClient(CONNECTION_STRING)

In [21]:
db = client.flight_info
flights = db.flights

In [8]:
IATAs = [
    "AOI",
    "BDS",
    "BHX",
    "BLQ",
    "CAG",
    "CDG",
    "CTA",
    "FCO",
    "FLR",
    "GLA",
    "GOA",
    "LHR",
    "LIN",
    "LYS",
    "MAN",
    "MRS",
    "MXP",
    "NAP",
    "NCE",
    "OLB",
    "ORY",
    "SUF",
    "TLS",
    "TRN",
    "TRS",
    "VCE",
    "VRN",
]

In [7]:
def get_bearer():
    data = {
        "client_id": "3xeucrdc57sceex6dnpdpm4j",
        "client_secret": "",
        "grant_type": "client_credentials",
    }
    response = requests.post("https://api.lufthansa.com/v1/oauth/token", data=data)

    ## probably this can be done more tidily, but essentially we turn the response into readable JSON
    temp_json = json.dumps(response.json())
    temp_resp = json.loads(temp_json)
    bearer = temp_resp["access_token"]

    return bearer

In [10]:
def api_scrape():
    results = []
    result_out = []
    data_out = []

    day_arg = datetime.today() - timedelta(days=1)
    day_arg = day_arg.strftime("/%Y-%m-%dT")

    times = ["00:00", "04:00", "08:00", "12:00", "16:00", "20:00"]

    print(day_arg)

    bearer = get_bearer()
    print("Bearer created is: ", bearer)

    headers = {
        "accept": "application/json",
        "authorization": "Bearer " + bearer,
    }

    for code in IATAs:
        for dtime in times:
            response = requests.get(
                "https://api.lufthansa.com/v1/operations/customerflightinformation/arrivals/"
                + code
                + day_arg
                + dtime,
                headers=headers,
            )
            print(
                "requested with code: {0}, day_arg: {1}, dtime: {2}, bearer:{3}".format(
                    code, day_arg, dtime, headers["authorization"]
                )
            )
        time.sleep(0.2)

        ## if there are no arrivals within a window, we get the resource not found error, so we want to filter these out of the response
        ## we also want to filter out any "error" messages
        if "ResourceNotFound" and "Error" not in response.text:
            results.append(response)

    print("results is: ", results)
    for result in results:
        result_out.append(json.dumps(result.json(strict=False), ensure_ascii=True))

    print("result_out is: ", result_out)
    with open("data/all_airports.txt", "w") as f:
        f.write(str(result_out))

In [11]:
def flatten_data():
    data_out = []

    with open("data/all_airports.txt", "r", encoding="utf-8") as f:
        data = f.read()

    data = re.sub("}\n{", "},{", data)
    data = re.sub("\['\{", "[{", data)
    data = re.sub("', '", ",", data)
    data = re.sub("'\]", "]", data)

    data = json.loads(data)

    ## loop through the list of flight details and append just the flight departure information into our list
    for i in range(len(data)):
        ## if this json entry is a list (IE the request returned more than one flight) - loop through the list and append each individually
        if type(data[i]["FlightInformation"]["Flights"]["Flight"]) == list:
            for item in data[i]["FlightInformation"]["Flights"]["Flight"]:
                data_out.append(item)
        else:
            data_out.append(data[i]["FlightInformation"]["Flights"]["Flight"])

    print("flattened data is: ", data_out)
    with open("data/airports_parsed.txt", "w") as f:
        json.dump(data_out, f)

In [22]:
def writetoMongo():

    with open("data/airports_parsed.txt", "r") as f:
        data = json.load(f)

    if isinstance(data, list):
        flights.insert_many(data)
    else:
        flights.insert_one(data)

In [None]:
api_scrape()

In [15]:
flatten_data()

flattened data is:  [{'Departure': {'AirportCode': 'ZRH', 'Scheduled': {'Date': '2022-12-17', 'Time': '17:20'}, 'Actual': {'Date': '2022-12-17', 'Time': '18:36'}, 'Status': {'Code': 'DP', 'Description': 'Flight Departed'}}, 'Arrival': {'AirportCode': 'BDS', 'Scheduled': {'Date': '2022-12-17', 'Time': '19:15'}, 'Actual': {'Date': '2022-12-17', 'Time': '20:06'}, 'Status': {'Code': 'LD', 'Description': 'Flight Landed'}}, 'OperatingCarrier': {'AirlineID': '2L', 'FlightNumber': '1722'}, 'Equipment': {'AircraftCode': '295'}, 'Status': {'Code': 'LD', 'Description': 'Flight Landed'}}, {'Departure': {'AirportCode': 'FRA', 'Scheduled': {'Date': '2022-12-17', 'Time': '21:50'}, 'Actual': {'Date': '2022-12-17', 'Time': '23:04'}, 'Terminal': {'Name': '1', 'Gate': 'B20'}, 'Status': {'Code': 'DP', 'Description': 'Flight Departed'}}, 'Arrival': {'AirportCode': 'BHX', 'Scheduled': {'Date': '2022-12-17', 'Time': '22:25'}, 'Actual': {'Date': '2022-12-17', 'Time': '23:44'}, 'Status': {'Code': 'LD', 'Descri

![](https://user-images.githubusercontent.com/62965911/215280124-bc6af95d-86dd-4667-af26-e411cf5d385b.png)

In [None]:
writetoMongo()