In [None]:
import pandas as pd
import polars as pl
import numpy as np

In [None]:
airport_distance_df = pl.read_csv("airport_location.csv")

In [None]:
airport_distance_df.head()

In [None]:
def haversine_distance(long1, lat1,
                       long2, lat2,
                       degrees=False):
    # degrees vs radians
    if degrees == True:
        long1 = np.radians(long1)
        lat1 = np.radians(lat1)
        long2 = np.radians(long2)
        lat2 = np.radians(lat2)

    # implementing haversine
    a = (
        np.sin((lat2 - lat1) / 2) ** 2
        + np.cos(lat1) * np.cos(lat2) * np.sin((long2 - long1) / 2) ** 2
    )
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    distance = 6371 * c  # radius of earth in kilometers
    return distance

In [None]:
 def get_distance_list(airport_dataframe, airport_code):
        row = airport_dataframe.filter(pl.col('Airport Code') == airport_code)
        # selects the row from our airport code input
        lat = row.select("Lat").item()  # get latitude
        long = row.select("Long").item()  # get longitude
        df = airport_dataframe.filter(pl.col('Airport Code') != airport_code)
        # filter out our airport, implement haversine distance
        df = df.with_columns(
            Distance = pl.struct(['Lat', 'Long']).map_batches(
                lambda x: haversine_distance(
                    lat1=lat, long1=long, lat2=x.struct.field("Lat"), long2=x.struct.field("Long"), degrees=True
                    )
            )
        )
        df_to_return = df.sort(by="Distance")
        return df_to_return # return values sorted


In [None]:
distance_airports = get_distance_list(
    airport_dataframe=airport_distance_df, airport_code='CDG'
)

print(list(distance_airports['Airport Code']))
distance_airports