In [None]:
import pandas as pd
from geopy.distance import geodesic
# Load or reuse the previously created geo_df DataFrame (if saved)
geo_df = pd.read_csv('geo_data.csv')  # Uncomment if loading from a saved file

# Extract the last column from the original data and rename it to "2024 Q2 Total Passengers"
last_column = df.iloc[:, -1].rename("2024 Q2 Total Passengers")

# Merge geo_df with the passengers data to create the final DataFrame
final_df = pd.DataFrame({
    'Flight Route': geo_df['Flight Route'],
    '2024 Q2 Total Passengers': last_column
})

# Merge with the geo data (coordinates and land codes)
final_df = pd.merge(final_df, geo_df[['Flight Route', 'Latitude', 'Longitude', 'Land Code']], on='Flight Route', how='left')

# Define the coordinates for the most central point in the Netherlands (Amersfoort)
central_netherlands_coords = (52.1561, 5.3878)

# Function to calculate the distance from the central point in the Netherlands to each airport
def calculate_distance_to_netherlands(lat, lon):
    if pd.notna(lat) and pd.notna(lon):
        return geodesic(central_netherlands_coords, (lat, lon)).kilometers
    else:
        return None

# Calculate the distance to the Netherlands for each airport
final_df['Distance to Netherlands (km)'] = final_df.apply(
    lambda row: calculate_distance_to_netherlands(row['Latitude'], row['Longitude']), axis=1
)

# Dictionary of known distances to Amersfoort, Netherlands for specific airports
known_distances = {
    "Montreal/Pierre Elliot Trudeau Intl, Qc Airport": 5560,
    "Toronto/Lester B. Pearson Intl, On Airport": 6000,
    "Hannover Uir Airport": 300,
    "Zagreb/Franjo Tudjman Airport": 1100,
    "Bangalore International Airport, Devenahalli, Bangalore Airport": 7800,
    "Tehran/Imam Khomaini Intl Airport": 4400,
    "Nairobi Acc,Fic,Rcc,Com Airport": 6500,
    "Luftfartstilsynet Civil Aviation Authority Airport": 1100,
    "Enfidha Zine El Abidine Ben Ali Airport": 1900,
    "Taibei City/Taibei Intl Ap Airport": 9500,
    "Kilimanjaro App, Twr, Ais, Met, Civil Airlines Airport": 7000,
    "Boston/General Edward Lawrence Logan International, Ma. Airport": 5600,
    "Washington Dulles International, Dc. Airport": 6200,
    "Maiquetia, Intl, Simon Bolivar, Vargas Airport": 7600,
    "Maastricht/Aachen Airport - Kos/Ippokratis Airport": 2100,
    "Maastricht/Aachen Airport - Zakinthos/Dionisios Solomos Airport": 2300,
    "Eindhoven Airport - Athinai/Eleftherios Venizelos Airport": 2200,
    "Eindhoven Airport - Kos/Ippokratis Airport": 2100,
    "Eindhoven Airport - Zagreb/Franjo Tudjman Airport": 1100,
    "Eindhoven Airport - Oujda/Angads Airport": 1900,
    "Eindhoven Airport - Nador/El Aroui Airport": 1900,
    "Groningen/Eelde Airport - Kos/Ippokratis Airport": 2100,
    "Rotterdam Airport - Kos/Ippokratis Airport": 2100,
    "Rotterdam Airport - Bergerac-Roumaniere Airport": 800,
    "Rotterdam Airport - Nador/El Aroui Airport": 1900,
}

# Function to update the distance in the DataFrame if it is NaN and has a known value in the dictionary
def update_distance(row):
    if pd.isna(row['Distance to Netherlands (km)']) and row['Flight Route'] in known_distances:
        return known_distances[row['Flight Route']]
    else:
        return row['Distance to Netherlands (km)']

# Apply the update function to the DataFrame to fill in the missing distances
final_df['Distance to Netherlands (km)'] = final_df.apply(update_distance, axis=1)

# Remove duplicate rows from the DataFrame
final_df = final_df.drop_duplicates()

# Clean '2024 Q2 Total Passengers' column
final_df['2024 Q2 Total Passengers'] = final_df['2024 Q2 Total Passengers'].replace(',', '', regex=True)
final_df['2024 Q2 Total Passengers'] = pd.to_numeric(final_df['2024 Q2 Total Passengers'], errors='coerce')

# Ensure that the numerical format is appropriate
final_df['2024 Q2 Total Passengers'] = final_df['2024 Q2 Total Passengers'].astype('float64')

# Display the final DataFrame with all relevant columns, including distances and land codes
print(final_df[['Flight Route', '2024 Q2 Total Passengers', 'Latitude', 'Longitude', 'Land Code', 'Distance to Netherlands (km)']].to_string(index=False))

# Optionally save the final DataFrame
final_df.to_csv('final_data.csv', index=False)

import pandas as pd

final_df = final_df.drop(index=final_df.index[0])



# Ensure that '2024 Q2 Total Passengers' and 'Distance to Netherlands (km)' columns are numeric
final_df['2024 Q2 Total Passengers'] = pd.to_numeric(final_df['2024 Q2 Total Passengers'], errors='coerce')
final_df['Distance to Netherlands (km)'] = pd.to_numeric(final_df['Distance to Netherlands (km)'], errors='coerce')

# Create a new column that multiplies total passengers by distance to the Netherlands
final_df['Passenger Distance Product'] = final_df.apply(
    lambda row: row['2024 Q2 Total Passengers'] * row['Distance to Netherlands (km)'] 
    if pd.notna(row['2024 Q2 Total Passengers']) and pd.notna(row['Distance to Netherlands (km)']) 
    else 'N/A',
    axis=1
)

# Create a new column for distance categories based on the distance to the Netherlands
def categorize_distance(distance):
    if pd.isna(distance):
        return 'N/A'
    elif distance < 1000:
        return 'Short'
    elif 1000 <= distance <= 5000:
        return 'Middle'
    else:
        return 'Long'

# Apply the function to the 'Distance to Netherlands (km)' column
final_df['distancecategory'] = final_df['Distance to Netherlands (km)'].apply(categorize_distance)

# Load the emission factors from the CSV file
emission_factors_path = r'C:\Users\omniq\Documents\GitHub\TIL6022-LabAssignments\Flytax-Project-Group24\CO2-effect\Data\carbon-footprint-travel-mode.csv'  # Update this to the correct CSV file path

# Load the emission factors DataFrame
emission_df = pd.read_csv(emission_factors_path)

# Extract the emission factors based on the specified rows
short_flight_factor = emission_df.iloc[-2, -1]  # Penultimate row for short flights
middle_flight_factor = emission_df.iloc[5, -1]  # 6th row for middle flights
long_flight_factor = emission_df.iloc[10, -1]  # 11th row for long flights

# Define the emission factors for each flight category
emission_factors = {
    'Short': short_flight_factor,
    'Middle': middle_flight_factor,
    'Long': long_flight_factor
}

# Create a new column for the emission factor based on the distance category
final_df['Emission Factor (g CO2/pax-km)'] = final_df['distancecategory'].map(emission_factors)

# Convert the columns to numeric, ensuring any non-convertible values become NaN
final_df['Passenger Distance Product'] = pd.to_numeric(final_df['Passenger Distance Product'], errors='coerce')
final_df['Emission Factor (g CO2/pax-km)'] = pd.to_numeric(final_df['Emission Factor (g CO2/pax-km)'], errors='coerce')

# Create a new column for total CO2 emissions for all passengers in tonnes
final_df['Total CO2 Emissions (tonnes, all passengers)'] = final_df.apply(
    lambda row: (row['Passenger Distance Product'] * row['Emission Factor (g CO2/pax-km)'] / 1_000_000) 
    if pd.notna(row['Passenger Distance Product']) and pd.notna(row['Emission Factor (g CO2/pax-km)']) 
    else 'N/A', 
    axis=1
)

# Display the updated DataFrame with all relevant columns
print(final_df[['Flight Route', '2024 Q2 Total Passengers', 'Latitude', 'Longitude', 'Distance to Netherlands (km)', 'Passenger Distance Product', 'distancecategory', 'Emission Factor (g CO2/pax-km)', 'Total CO2 Emissions (tonnes, all passengers)']].to_string(index=False))

# Print the number of rows after processing
print(f"Number of rows after processing: {len(final_df)}")

import pandas as pd

# Ensure that 'Distance to Netherlands (km)' column exists before filtering and calculating flight time
if 'Distance to Netherlands (km)' in final_df.columns:
    # Filter for locations less than 2500 km away
    filtered_final_df = final_df[final_df['Distance to Netherlands (km)'] < 2500]

    # Display the filtered DataFrame with the requested columns
    print(filtered_final_df[['Flight Route', '2024 Q2 Total Passengers', 'Latitude', 'Longitude', 'Distance to Netherlands (km)']].to_string(index=False))

    # Function to calculate flight time based on distance, average speed, and pre-flight time
    def calculate_flight_time(distance, avg_speed, pre_flight_time):
        if pd.notna(distance) and distance > 0 and avg_speed > 0:
            flight_time = distance / avg_speed  # Time = Distance / Speed
            total_time = flight_time + pre_flight_time  # Add pre-flight time
            return total_time
        else:
            return None

    # Input: Average speed in km/h and pre-flight time in hours
    avg_speed = float(input("Please enter the average speed of the travel in km/h: "))
    pre_flight_time = float(input("Please enter the pre-flight time in hours (e.g., 1.5 for 1.5 hours): "))

    # Create a new column for total flight time in hours based on the distance, average speed, and pre-flight time
    final_df['Flight Time (hours)'] = final_df['Distance to Netherlands (km)'].apply(lambda x: calculate_flight_time(x, avg_speed, pre_flight_time))

    # Display the DataFrame with the new flight time column
    print(final_df[['Flight Route', '2024 Q2 Total Passengers', 'Distance to Netherlands (km)', 'Flight Time (hours)']].to_string(index=False))

else:
    print("The 'Distance to Netherlands (km)' column was not found.")


In [None]:
import pandas as pd
from geopy.distance import geodesic

# Load the geo_df DataFrame (assume this is already loaded)
# geo_df = pd.read_csv('geo_data.csv')  # Uncomment if loading from a saved file

# Extract the last column from the original data and rename it to "2024 Q2 Total Passengers"
last_column = df.iloc[:, -1].rename("2024 Q2 Total Passengers")

# Merge geo_df with the passengers data to create the final DataFrame
final_df = pd.DataFrame({
    'Flight Route': geo_df['Flight Route'],
    '2024 Q2 Total Passengers': last_column
})

# Merge with the geo data (coordinates and land codes)
final_df = pd.merge(final_df, geo_df[['Flight Route', 'Latitude', 'Longitude', 'Land Code']], on='Flight Route', how='left')

# Define the coordinates for the most central point in the Netherlands (Amersfoort)
central_netherlands_coords = (52.1561, 5.3878)

# Function to calculate the distance from the central point in the Netherlands to each airport
def calculate_distance_to_netherlands(lat, lon):
    if pd.notna(lat) and pd.notna(lon):
        return geodesic(central_netherlands_coords, (lat, lon)).kilometers
    else:
        return None

# Calculate the distance to the Netherlands for each airport
final_df['Distance to Netherlands (km)'] = final_df.apply(
    lambda row: calculate_distance_to_netherlands(row['Latitude'], row['Longitude']), axis=1
)

# Dictionary of known distances to Amersfoort, Netherlands for specific airports
known_distances = {
    "Montreal/Pierre Elliot Trudeau Intl, Qc Airport": 5560,
    "Toronto/Lester B. Pearson Intl, On Airport": 6000,
    "Hannover Uir Airport": 300,
    "Zagreb/Franjo Tudjman Airport": 1100,
    "Bangalore International Airport, Devenahalli, Bangalore Airport": 7800,
    "Tehran/Imam Khomaini Intl Airport": 4400,
    "Nairobi Acc,Fic,Rcc,Com Airport": 6500,
    "Luftfartstilsynet Civil Aviation Authority Airport": 1100,
    "Enfidha Zine El Abidine Ben Ali Airport": 1900,
    "Taibei City/Taibei Intl Ap Airport": 9500,
    "Kilimanjaro App, Twr, Ais, Met, Civil Airlines Airport": 7000,
    "Boston/General Edward Lawrence Logan International, Ma. Airport": 5600,
    "Washington Dulles International, Dc. Airport": 6200,
    "Maiquetia, Intl, Simon Bolivar, Vargas Airport": 7600,
    "Maastricht/Aachen Airport - Kos/Ippokratis Airport": 2100,
    "Maastricht/Aachen Airport - Zakinthos/Dionisios Solomos Airport": 2300,
    "Eindhoven Airport - Athinai/Eleftherios Venizelos Airport": 2200,
    "Eindhoven Airport - Kos/Ippokratis Airport": 2100,
    "Eindhoven Airport - Zagreb/Franjo Tudjman Airport": 1100,
    "Eindhoven Airport - Oujda/Angads Airport": 1900,
    "Eindhoven Airport - Nador/El Aroui Airport": 1900,
    "Groningen/Eelde Airport - Kos/Ippokratis Airport": 2100,
    "Rotterdam Airport - Kos/Ippokratis Airport": 2100,
    "Rotterdam Airport - Bergerac-Roumaniere Airport": 800,
    "Rotterdam Airport - Nador/El Aroui Airport": 1900,
}

# Update the distance in the DataFrame if it is NaN and has a known value in the dictionary
final_df['Distance to Netherlands (km)'] = final_df.apply(
    lambda row: known_distances[row['Flight Route']] if pd.isna(row['Distance to Netherlands (km)']) and row['Flight Route'] in known_distances else row['Distance to Netherlands (km)'],
    axis=1
)

# Remove duplicate rows from the DataFrame
final_df = final_df.drop_duplicates()

# Clean '2024 Q2 Total Passengers' column
final_df['2024 Q2 Total Passengers'] = final_df['2024 Q2 Total Passengers'].replace(',', '', regex=True)
final_df['2024 Q2 Total Passengers'] = pd.to_numeric(final_df['2024 Q2 Total Passengers'], errors='coerce')

# Create a new column that multiplies total passengers by distance to the Netherlands
final_df['Passenger Distance Product'] = final_df.apply(
    lambda row: row['2024 Q2 Total Passengers'] * row['Distance to Netherlands (km)']
    if pd.notna(row['2024 Q2 Total Passengers']) and pd.notna(row['Distance to Netherlands (km)'])
    else 'N/A',
    axis=1
)

# Create a new column for distance categories based on the distance to the Netherlands
def categorize_distance(distance):
    if pd.isna(distance):
        return 'N/A'
    elif distance < 1000:
        return 'Short'
    elif 1000 <= distance <= 5000:
        return 'Middle'
    else:
        return 'Long'

final_df['distancecategory'] = final_df['Distance to Netherlands (km)'].apply(categorize_distance)

# Load the emission factors from the CSV file
emission_factors_path = r'C:\Users\omniq\Documents\GitHub\TIL6022-LabAssignments\Flytax-Project-Group24\CO2-effect\Data\carbon-footprint-travel-mode.csv'
emission_df = pd.read_csv(emission_factors_path)

# Extract emission factors
short_flight_factor = emission_df.iloc[-2, -1]  # Penultimate row for short flights
middle_flight_factor = emission_df.iloc[5, -1]  # 6th row for middle flights
long_flight_factor = emission_df.iloc[10, -1]  # 11th row for long flights

# Map emission factors based on the distance category
emission_factors = {'Short': short_flight_factor, 'Middle': middle_flight_factor, 'Long': long_flight_factor}
final_df['Emission Factor (g CO2/pax-km)'] = final_df['distancecategory'].map(emission_factors)

# Convert relevant columns to numeric
final_df['Passenger Distance Product'] = pd.to_numeric(final_df['Passenger Distance Product'], errors='coerce')
final_df['Emission Factor (g CO2/pax-km)'] = pd.to_numeric(final_df['Emission Factor (g CO2/pax-km)'], errors='coerce')

# Create a new column for total CO2 emissions for all passengers in tonnes
final_df['Total CO2 Emissions (tonnes, all passengers)'] = final_df.apply(
    lambda row: (row['Passenger Distance Product'] * row['Emission Factor (g CO2/pax-km)'] / 1_000_000)
    if pd.notna(row['Passenger Distance Product']) and pd.notna(row['Emission Factor (g CO2/pax-km)'])
    else 'N/A',
    axis=1
)

# Function to calculate flight time
def calculate_flight_time(distance, avg_speed, pre_flight_time):
    if pd.notna(distance) and distance > 0 and avg_speed > 0:
        flight_time = distance / avg_speed  # Time = Distance / Speed
        total_time = flight_time + pre_flight_time  # Add pre-flight time
        return total_time
    else:
        return None

# Example inputs for speed and pre-flight time
avg_speed = 800  # Example average speed in km/h
pre_flight_time = 1.5  # Example pre-flight time in hours

# Calculate the flight time for each row
final_df['Flight Time (hours)'] = final_df['Distance to Netherlands (km)'].apply(
    lambda x: calculate_flight_time(x, avg_speed, pre_flight_time)
)

# Display the final DataFrame with all relevant columns
print(final_df[['Flight Route', '2024 Q2 Total Passengers', 'Latitude', 'Longitude', 'Distance to Netherlands (km)', 'Passenger Distance Product', 'distancecategory', 'Emission Factor (g CO2/pax-km)', 'Total CO2 Emissions (tonnes, all passengers)', 'Flight Time (hours)']].to_string(index=False))



In [None]:
import pandas as pd

# Path to your CSV file
file_path = r'C:\Users\omniq\Documents\GitHub\TIL6022-LabAssignments\Flytax-Project-Group24\CO2-effect\Data\Flight Netherlands\avia_par_nl__custom_13280147_spreadsheet.csv'

# Load the CSV file with ';' delimiter and skipping rows until the actual data starts (e.g., row 10)
df = pd.read_csv(file_path, sep=';', skiprows=10)

# Inspect the columns to ensure they are as expected
print(df.columns)

# Rename columns for easier access if necessary (assuming TIME and AIRP_PR are column headers)
df.columns = ['Flight Route', '2010', 'empty_2010', '2011', 'empty_2011', '2012', 'empty_2012', '2013', 'empty_2013',
              '2014', 'empty_2014', '2015', 'empty_2015', '2016', 'empty_2016', '2017', 'empty_2017', '2018',
              'empty_2018', '2019', 'empty_2019', '2020', 'empty_2020', '2021', 'empty_2021', '2022', 'empty_2022',
              '2023','extra column']

# Select the columns of interest: "Flight Route" and "2022"
df_2022 = df[['Flight Route', '2022']]

# Remove any unwanted rows or NaN values if necessary
df_2022_cleaned = df_2022.dropna()

# Display the cleaned dataframe
print(df_2022_cleaned)