In [22]:
import pandas as pd
import plotly.express as px
import datetime as dt
import re
import geopandas as gpd

file_path = '37478eng_UntypedDataSet_09102023_105711.csv'

data = pd.read_csv(file_path, delimiter=';')

data.loc[ data['Airports'] == 'A045844', 'Airports'] = 'TOTAL' #The names are changed to make it more usefull
data.loc[ data['Airports'] == 'A043590', 'Airports'] = 'Schiphol'
data.loc[ data['Airports'] == 'A043596', 'Airports'] = 'Rotterdam The Hague'
data.loc[ data['Airports'] == 'A043591', 'Airports'] = 'Eindhoven'
data.loc[ data['Airports'] == 'A043595', 'Airports'] = 'Maastricht Aachen'
data.loc[ data['Airports'] == 'A043593', 'Airports'] = 'Groningen'

data.set_index('ID')

data.rename(columns={"TotalFlights_3": "Total Flights", 
                    "TotalPassengers_12": "Total Passengers",
                    "TotalCargo_43": "Total Cargo [tons]",
                    "TotalMail_74":
                    "Total Mail [tons]"},
            inplace=True)


# Define a function to filter the data for the right cargo type and start year:
def clean_scatter_data(df, start_year, cols): 
    # Filter monthly data only and convert to datetime.
    df = df[df['Periods'].str.contains('MM')]
    df['Periods']= pd.to_datetime(df['Periods'], format='%YMM%m')

    # Select a start year
    df = df[df['Periods'].dt.year >= start_year]
    df = df[cols]

    keys = list(df.keys()[3:])

    for key in keys:
        wordlist = re.findall('[A-Z][^A-Z]*', key.split('_')[0])
        new_key = " ".join(wordlist)
        df = df.rename(columns = {key: new_key})
    
    return df


# Define a function to create an animation based on a dataset and a start year
def scatter_animation(df, title=''):


    # Convert DataFrame to long form data for use with Plotly Express
    df = df.melt(id_vars=['Airports', 'Periods'])
    df = df[df['Airports'] == 'TOTAL']

    # Load Geometry
    geo_df = gpd.read_file('map_v2.geojson')

    # Create a list of geometry objects to assign to the data:
    loclist = []
    for i in range(len(df)):
        region = df.iloc[i, 2]
        location = geo_df[geo_df['id'] == region].iloc[0, 1]
        loclist.append(location)

    # Reset the index of df to ensure index of loclist matches dataset, and convert to a GeoDataframe
    df = df.reset_index()
    df['geometry'] = loclist
    gdf = gpd.GeoDataFrame(df)

    # Deal with missing datapoints by forcing the value column to be numeric
    gdf['value'] = pd.to_numeric(gdf['value'], errors='coerce')

    # Create the animation:
    fig = px.scatter_geo(gdf,
                        lat=gdf.geometry.y,
                        lon=gdf.geometry.x,
                        hover_name='variable',
                        size='value',
                        animation_frame='Periods',
                        title=title,
                        labels={'variable': 'Origin/Destination'})
    return fig


# Selecting and cleaning PASSENGER data
# Select the relevant columns for regional data.
passengers = clean_scatter_data(data, 2019, ['Airports', 'Periods', 'EUCountries_23', 'OtherEurope_24', 'NorthAfrica_27', 'WestAfrica_28', 'CentralAfrica_29', 'EastAfrica_30', 'SouthAfrica_31', 'NorthAmerica_33', 'CentralAmerica_34', 'SouthAmerica_35', 'WestAsia_37', 'SouthEastAsia_38', 'NorthEastAsia_39', 'Oceania_40'])
passengers = passengers.rename(columns={'EUCountries_23': 'EU Countries'})

# Selecting and cleaning CARGO data
# Select the relevant columns for regional data.
cargo = clean_scatter_data(data, 2019, ['Airports', 'Periods', 'EUCountries_54', 'OtherEurope_55', 'NorthAfrica_58', 'WestAfrica_59', 'CentralAfrica_60', 'EastAfrica_61', 'SouthAfrica_62', 'NorthAmerica_64', 'CentralAmerica_65', 'SouthAmerica_66', 'WestAsia_68', 'SouthEastAsia_69', 'NorthEastAsia_70', 'Oceania_71'])
cargo = cargo.rename(columns = {'EUCountries_54': 'EU Countries'})


# # Selecting and cleaning MAIL data
# # Select the relevant columns for regional data.
mail = clean_scatter_data(data, 2019, ['Airports', 'Periods', "EUCountries_85","OtherEurope_86","NorthAfrica_89","WestAfrica_90","CentralAfrica_91","EastAfrica_92","SouthAfrica_93","NorthAmerica_95","CentralAmerica_96","SouthAmerica_97","WestAsia_99","SouthEastAsia_100","NorthEastAsia_101","Oceania_102"])
mail = mail.rename(columns={'EUCountries_85': 'EU Countries'})

# # Plot both datasets
scatter_animation(passengers, title='Passenger Volumes from Dutch Airports').show()
scatter_animation(cargo, title='Cargo Volumes from Dutch Airports').show()
scatter_animation(mail, title='Mail Volumes from Dutch Airports').show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

