In [319]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [320]:
# Load the ferry trips data
ferry_data = pd.read_csv('data/ferry_trips_data_large.csv')

# Display the first few rows to understand the structure
ferry_data.head()
# Summary of the dataset
#ferry_data.info()

Unnamed: 0,route_id,route_name,ferry_name,ferry_id,terminal_departure,terminal_arrival,time_departure,cars_outbound,trucks_outbound,trucks_with_trailer_outbound,...,passenger_car_equivalent_outbound,passenger_car_equivalent_inbound,fuelcons_outbound_l,distance_outbound_nm,start_time_outbound,end_time_outbound,fuelcons_inbound_l,distance_inbound_nm,start_time_inbound,end_time_inbound
0,17,Oxdjupsleden,Fragancia,289,Rindö ...,Värmdö ...,2023-03-01 05:44:00,6,0,0,...,6.0,2.0,,,,,,,,
1,17,Oxdjupsleden,Fragancia,289,Rindö ...,Värmdö ...,2023-03-01 06:04:00,4,0,0,...,4.0,8.5,,,,,,,,
2,17,Oxdjupsleden,Fragancia,289,Rindö ...,Värmdö ...,2023-03-01 06:24:00,8,1,0,...,10.5,14.0,,,,,,,,
3,17,Oxdjupsleden,Fragancia,289,Rindö ...,Värmdö ...,2023-03-01 06:44:00,5,1,0,...,7.5,9.0,,,,,,,,
4,17,Oxdjupsleden,Fragancia,289,Rindö ...,Värmdö ...,2023-03-01 07:04:00,7,0,0,...,7.0,8.0,,,,,,,,


## Q1 What is the total fuel cost for ferry Jupiter in January 2024 assuming that the price per liter is 20 sek?

In [321]:
# Convert 'time_departure' to datetime objects for filtering
ferry_data['time_departure'] = pd.to_datetime(ferry_data['time_departure'])

# Filter data for ferry 'Jupiter' and January 2024
jupiter_jan_data = ferry_data[
    (ferry_data['ferry_name'] == 'Jupiter') &
    (ferry_data['time_departure'].dt.month == 1) &
    (ferry_data['time_departure'].dt.year == 2024)
]

# Calculate total fuel consumption (handling potential NaNs by filling with 0)
total_fuel_consumption = (
    jupiter_jan_data['fuelcons_outbound_l'].fillna(0).sum() +
    jupiter_jan_data['fuelcons_inbound_l'].fillna(0).sum()
)

# Assuming a fuel cost per liter (you might need to adjust this)
# Let's assume a fuel cost of 10 SEK per liter for example purposes
fuel_cost_per_liter = 20

# Calculate the total fuel cost
total_fuel_cost = total_fuel_consumption * fuel_cost_per_liter

print(f"Total fuel consumption for ferry Jupiter in January 2024: {total_fuel_consumption:.2f} liters")
print(f"Total fuel cost for ferry Jupiter in January 2024: {total_fuel_cost:.2f} SEK")

Total fuel consumption for ferry Jupiter in January 2024: 12709.34 liters
Total fuel cost for ferry Jupiter in January 2024: 254186.70 SEK


## Q2 What is the average speed of ferry Jupiter? (in km/h)


In [322]:
# Convert time columns to datetime objects
ferry_data['start_time_outbound'] = pd.to_datetime(ferry_data['start_time_outbound'])
ferry_data['end_time_outbound'] = pd.to_datetime(ferry_data['end_time_outbound'])
ferry_data['start_time_inbound'] = pd.to_datetime(ferry_data['start_time_inbound'])
ferry_data['end_time_inbound'] = pd.to_datetime(ferry_data['end_time_inbound'])

# Filter data for ferry 'Jupiter'
jupiter_data = ferry_data[ferry_data['ferry_name'] == 'Jupiter'].copy()

# Function to calculate trip duration in hours, handling potential NaNs
def calculate_duration(start_time, end_time):
    if pd.notna(start_time) and pd.notna(end_time):
        duration = (end_time - start_time).total_seconds() / 3600
        return duration
    return None

# Apply the function to calculate outbound and inbound durations
jupiter_data['duration_outbound'] = jupiter_data.apply(
    lambda row: calculate_duration(row['start_time_outbound'], row['end_time_outbound']), axis=1
)
jupiter_data['duration_inbound'] = jupiter_data.apply(
    lambda row: calculate_duration(row['start_time_inbound'], row['end_time_inbound']), axis=1
)

# Calculate average speed for outbound trips (distance in nautical miles, duration in hours)
jupiter_data['speed_outbound_knots'] = jupiter_data.apply(
    lambda row: row['distance_outbound_nm'] / row['duration_outbound']
    if row['duration_outbound'] else None,
    axis=1
)

# Calculate average speed for inbound trips
jupiter_data['speed_inbound_knots'] = jupiter_data.apply(
    lambda row: row['distance_inbound_nm'] / row['duration_inbound']
    if row['duration_inbound'] else None,
    axis=1
)

# Calculate the average of all valid speed values
valid_speeds = pd.concat([jupiter_data['speed_outbound_knots'].dropna(), jupiter_data['speed_inbound_knots'].dropna()])

if not valid_speeds.empty:
    average_speed_knots = valid_speeds.mean()
    print(f"The average speed of ferry Jupiter is: {average_speed_knots:.2f} knots")

    # Convert knots to km/h and mph for better understanding
    average_speed_kmh = average_speed_knots * 1.852
    average_speed_mph = average_speed_knots * 1.15078
    print(f"The average speed of ferry Jupiter in km/h is: {average_speed_kmh:.2f} km/h")
else:
    print("Could not calculate the average speed for ferry Jupiter due to missing distance or time information.")

The average speed of ferry Jupiter is: 6.23 knots
The average speed of ferry Jupiter in km/h is: 11.55 km/h


## Q3 How many vehicles are on average left at the terminal for inbound trips? 

In [323]:
import pandas as pd

def calculate_avg_vehicles_left_inbound(df):
    """
    Calculates the average number of vehicles left at the terminal for inbound trips.
    
    Args:
        df: pandas DataFrame containing ferry operation data
        
    Returns:
        float: average number of vehicles left at terminal for inbound trips
    """
    # Calculate average vehicles left at terminal for inbound trips only
    avg_vehicles_left_inbound = df['vehicles_left_at_terminal_inbound'].mean()
    
    # Print the results
    print("Analysis of Vehicles Left at Terminal for Inbound Trips:")
    print(f"Average vehicles left at terminal per inbound trip: {avg_vehicles_left_inbound:.4f}")
    
    # Format the answer for Q3
    print(f"\nQ3 Answer: On average, {avg_vehicles_left_inbound:.4f} vehicles are left at the terminal for inbound trips.")
    
    return avg_vehicles_left_inbound

# Load the ferry trips data
try:

    # Execute the function
    calculate_avg_vehicles_left_inbound(ferry_data)
    
except Exception as e:
    print(f"Error: {e}") 

Analysis of Vehicles Left at Terminal for Inbound Trips:
Average vehicles left at terminal per inbound trip: 0.3270

Q3 Answer: On average, 0.3270 vehicles are left at the terminal for inbound trips.


## Q4 How many vehicles are on average unable to board (left behind at the terminal) per trip for ferry Jupiter during summer months (6, 7, 8)?

In [324]:
# No need to import pandas or load data again - ferry_data is already available

# Convert 'time_departure' to datetime objects for filtering (if not already done)
ferry_data['time_departure'] = pd.to_datetime(ferry_data['time_departure'], errors='coerce')

# Filter data for ferry 'Jupiter'
jupiter_data = ferry_data[ferry_data['ferry_name'] == 'Jupiter'].copy()

# Define summer months (6, 7, 8)
summer_months = [6, 7, 8]

# Filter Jupiter data for summer months, handling potential NaT values
jupiter_summer_data = jupiter_data[
    jupiter_data['time_departure'].notna() &
    jupiter_data['time_departure'].dt.month.isin(summer_months)
]

# Calculate the total vehicles LEFT at the terminal (not transported) during summer for Jupiter
left_at_terminal_cols = [
    'vehicles_left_at_terminal_outbound',
    'vehicles_left_at_terminal_inbound'
]

# Calculate the total vehicles left at terminal
total_left_at_terminal = jupiter_summer_data[left_at_terminal_cols].fillna(0).sum().sum()

# Calculate the total number of trips made by Jupiter during the summer
total_trips_summer_jupiter = len(jupiter_summer_data)

# Calculate the average amount of vehicles left per trip for Jupiter during summer
avg_left_at_terminal = total_left_at_terminal / total_trips_summer_jupiter if total_trips_summer_jupiter > 0 else 0

# Display trips by month
monthly_trips = jupiter_summer_data.groupby(jupiter_summer_data['time_departure'].dt.month).size()
for month, count in monthly_trips.items():
    month_name = {6: 'June', 7: 'July', 8: 'August'}[month]
    print(f"Month {month} ({month_name}): {count} trips")

print(f"\nTotal vehicles left at terminal (unable to board) for ferry Jupiter during months 6, 7, 8: {total_left_at_terminal}")
print(f"Total number of trips for ferry Jupiter during these months: {total_trips_summer_jupiter}")
print(f"The average number of vehicles left at the terminal per trip is: {avg_left_at_terminal:.2f}")


Month 6 (June): 1512 trips
Month 7 (July): 1556 trips
Month 8 (August): 1543 trips

Total vehicles left at terminal (unable to board) for ferry Jupiter during months 6, 7, 8: 14594
Total number of trips for ferry Jupiter during these months: 4611
The average number of vehicles left at the terminal per trip is: 3.17


## Q5 What is the highest average daily PCE (Passenger Car Equivalent) value for Furusundsleden route? (outbound and inbound combined)?

In [325]:
def analyze_furusundsleden_highest_daily_pce(df):
    """
    Calculate the highest average daily PCE value for Furusundsleden route.
    
    Args:
        df: pandas DataFrame containing ferry operation data
        
    Returns:
        float: Highest average daily PCE value for Furusundsleden
    """
    # Filter for Furusundsleden route
    furusund_data = df[df['route_name'] == 'Furusundsleden'].copy()
    
    if furusund_data.empty:
        print("No data found for Furusundsleden route")
        return None
    
    # Convert departure time to datetime
    furusund_data['time_departure'] = pd.to_datetime(furusund_data['time_departure'])
    
    # Extract day of week (0=Monday, 6=Sunday)
    furusund_data['day_of_week'] = furusund_data['time_departure'].dt.dayofweek
    
    # Map numeric day to name for better readability
    day_names = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 
                4: 'Friday', 5: 'Saturday', 6: 'Sunday'}
    furusund_data['day_name'] = furusund_data['day_of_week'].map(day_names)
    
    # Calculate total PCE for each trip (outbound + inbound)
    furusund_data['total_pce'] = (
        furusund_data['passenger_car_equivalent_outbound'].fillna(0) + 
        furusund_data['passenger_car_equivalent_inbound'].fillna(0)
    )
    
    # Group by day of week and calculate average PCE
    daily_pce = furusund_data.groupby(['day_of_week', 'day_name']).agg(
        avg_pce=('total_pce', 'mean'),
        trip_count=('route_id', 'count')
    ).reset_index()
    
    # Sort by day of week for consistent output
    daily_pce = daily_pce.sort_values('day_of_week')
    
    # Find the day with highest average PCE
    highest_pce_idx = daily_pce['avg_pce'].idxmax()
    highest_pce_row = daily_pce.loc[highest_pce_idx]
    highest_pce_value = highest_pce_row['avg_pce']
    
    # Print detailed analysis
    print("\nFurusundsleden Daily PCE Analysis:")
    print("-" * 50)
    print("Average PCE by day of week:")
    
    for _, row in daily_pce.iterrows():
        print(f"{row['day_name']}: {row['avg_pce']:.2f} PCE ({row['trip_count']} trips)")
    
    print(f"\nQ21 Answer: The highest average daily PCE value for Furusundsleden route is {highest_pce_value:.2f}.")
    
    return highest_pce_value

# Execute the function
highest_daily_pce = analyze_furusundsleden_highest_daily_pce(ferry_data)


Furusundsleden Daily PCE Analysis:
--------------------------------------------------
Average PCE by day of week:
Monday: 37.05 PCE (2184 trips)
Tuesday: 36.49 PCE (2173 trips)
Wednesday: 37.27 PCE (2198 trips)
Thursday: 38.42 PCE (2251 trips)
Friday: 40.33 PCE (2308 trips)
Saturday: 30.23 PCE (2161 trips)
Sunday: 31.78 PCE (2267 trips)

Q21 Answer: The highest average daily PCE value for Furusundsleden route is 40.33.


## Q6 What is the average distance travelled on Ljusteröleden (in nautical miles)?

In [326]:
def average_distance_on_ljusteroleden(df):
    ljusteroleden_data = df[df['route_name'] == 'Ljusteröleden'].copy()

    if not ljusteroleden_data.empty:
        # Calculate the average distance for each trip on Ljusteröleden
        ljusteroleden_data['average_trip_distance_nm'] = (
            ljusteroleden_data['distance_outbound_nm'] + ljusteroleden_data['distance_inbound_nm']
        ) / 2

        # Calculate the overall average distance for Ljusteröleden
        average_distance = ljusteroleden_data['average_trip_distance_nm'].mean()

        print(f"The average distance travelled on Ljusteröleden is {average_distance:.2f} nautical miles.")
    else:
        print("Data for Ljusteröleden not found in the provided DataFrame.")


In [327]:
average_distance_on_ljusteroleden(ferry_data)

The average distance travelled on Ljusteröleden is 0.57 nautical miles.


## Q7 What is the total Passenger Car Equivalent (PCE) during the peak hour for route Aspöleden in February, combining both outbound and inbound directions?

In [328]:
# Q6 What is the peak hour for route Aspöleden in February?

def peak_hour_aspo_february(df):
    # Make a copy of the dataframe
    df = df.copy()
    
    # Convert time_departure to datetime and filter for February
    df['time_departure'] = pd.to_datetime(df['time_departure'], errors='coerce')
    # Create an explicit copy when filtering to avoid SettingWithCopyWarning
    february_data = df[df['time_departure'].dt.month == 2].copy()
    
    # Extract hour and filter for Aspöleden
    february_data['hour'] = february_data['time_departure'].dt.hour
    aspo_february = february_data[february_data['route_name'] == 'Aspöleden'].copy()
    
    # Aggregate by hour
    hourly_demand = aspo_february.groupby('hour').agg({
        'passenger_car_equivalent_outbound': 'sum',
        'passenger_car_equivalent_inbound': 'sum'
    }).reset_index()
    
    # Find peak hour (for both directions)
    outbound_peak = hourly_demand.loc[hourly_demand['passenger_car_equivalent_outbound'].idxmax()]
    inbound_peak = hourly_demand.loc[hourly_demand['passenger_car_equivalent_inbound'].idxmax()]
    
    hourly_demand['total_pce'] = hourly_demand['passenger_car_equivalent_outbound'] + hourly_demand['passenger_car_equivalent_inbound']
    overall_peak = hourly_demand.loc[hourly_demand['total_pce'].idxmax()]
    
    print(f"Peak hour analysis for Aspöleden in February:")
    print(f"Outbound peak: Hour {outbound_peak['hour']}:00 with {outbound_peak['passenger_car_equivalent_outbound']:.2f} PCE")
    print(f"Inbound peak: Hour {inbound_peak['hour']}:00 with {inbound_peak['passenger_car_equivalent_inbound']:.2f} PCE")
    print(f"The average PCE for the peak hour is: {overall_peak['total_pce']:.2f}")
    
    # Determine overall peak (combining both directions)
   
    
    print(f"\nThe peak hour has a total PCE of {overall_peak['total_pce']:.2f}")
    
    return overall_peak['hour']

# Find the peak hour for Aspöleden in February
peak_hour = peak_hour_aspo_february(ferry_data)

Peak hour analysis for Aspöleden in February:
Outbound peak: Hour 12.0:00 with 701.00 PCE
Inbound peak: Hour 12.0:00 with 791.00 PCE
The average PCE for the peak hour is: 1492.00

The peak hour has a total PCE of 1492.00


## Q8 What is the average fuel consumption on the route Ljusterö to Östanå, combining both outbound and inbound?

In [329]:

def fuel_consumption_ljustero_ostana(df):
    """Calculate average fuel consumption for the route between Ljusterö and Östanå."""
    # Filter for Ljusteröleden route
    ljusteroleden_data = df[df['route_name'] == 'Ljusteröleden'].copy()

    if not ljusteroleden_data.empty:
        # Select fuel consumption columns and drop rows with missing values
        consumption_data = ljusteroleden_data[['fuelcons_outbound_l', 'fuelcons_inbound_l']].dropna()
        
        if not consumption_data.empty:
            # Calculate total fuel consumption per trip (outbound + inbound)
            consumption_data['total_fuel_per_trip'] = consumption_data['fuelcons_outbound_l'] + consumption_data['fuelcons_inbound_l']
            
            # Calculate the average total fuel consumption per trip
            average_consumption = consumption_data['total_fuel_per_trip'].mean()
            
       
            
            # Also provide the breakdown by direction for context
            average_outbound = consumption_data['fuelcons_outbound_l'].mean()
            average_inbound = consumption_data['fuelcons_inbound_l'].mean()
            print(f"  - Average outbound (Ljusterö to Östanå): {average_outbound:.2f} liters")
            print(f"  - Average inbound (Östanå to Ljusterö): {average_inbound:.2f} liters")
            print(f"  - Average total fuel consumption per trip: {average_consumption:.2f} liters")
            
            print(f"\nThe average combined fuel consumption on the route Ljusterö to Östanå: {average_consumption:.2f} liters per trip")
            
            return average_consumption
        else:
            print("No valid fuel consumption data found for the route between Ljusterö and Östanå")
            return None
    else:
        print("Data for Ljusteröleden not found in the provided DataFrame")
        return None

# Calculate average fuel consumption for the route between Ljusterö and Östanå
average_fuel_consumption = fuel_consumption_ljustero_ostana(ferry_data)

  - Average outbound (Ljusterö to Östanå): 7.00 liters
  - Average inbound (Östanå to Ljusterö): 6.98 liters
  - Average total fuel consumption per trip: 13.98 liters

The average combined fuel consumption on the route Ljusterö to Östanå: 13.98 liters per trip


# Q9: What is the average trip duration for Oxdjupsleden (in minutes)?

In [330]:
# Q9: What is the average trip duration for Oxdjupsleden?

def calculate_oxdjupsleden_duration(df):
    """Calculate the average trip duration for Oxdjupsleden route."""
    # Create a copy to avoid modifying original data
    oxdjup_data = df[df['route_name'] == 'Oxdjupsleden'].copy()
    
    # Convert time columns to datetime
    time_columns = ['start_time_outbound', 'end_time_outbound', 
                    'start_time_inbound', 'end_time_inbound']
    for col in time_columns:
        oxdjup_data[col] = pd.to_datetime(oxdjup_data[col])
    
    # Calculate duration in minutes for outbound and inbound trips
    oxdjup_data.loc[:, 'outbound_duration'] = (
        (oxdjup_data['end_time_outbound'] - oxdjup_data['start_time_outbound'])
        .dt.total_seconds() / 60
    )
    
    oxdjup_data.loc[:, 'inbound_duration'] = (
        (oxdjup_data['end_time_inbound'] - oxdjup_data['start_time_inbound'])
        .dt.total_seconds() / 60
    )
    
    # Calculate average durations
    avg_outbound = oxdjup_data['outbound_duration'].mean()
    avg_inbound = oxdjup_data['inbound_duration'].mean()
    avg_total = (avg_outbound + avg_inbound) / 2
    
    print(f"Average Trip Duration for Oxdjupsleden:")
    print(f"  - Outbound: {avg_outbound:.2f} minutes")
    print(f"  - Inbound: {avg_inbound:.2f} minutes")
    print(f"\nThe average trip duration is {avg_total:.2f} minutes")
    
    return avg_total

# Calculate average trip duration for Oxdjupsleden
avg_duration = calculate_oxdjupsleden_duration(ferry_data)

Average Trip Duration for Oxdjupsleden:
  - Outbound: 3.16 minutes
  - Inbound: 3.09 minutes

The average trip duration is 3.12 minutes


# Q10: What is the fuel consumption (liters per nautical mile) for ferry Merkurius?

In [331]:
# Q10: Which ferry has the best fuel efficiency, and what is its liters per nautical mile?

def analyze_fuel_efficiency(df):
    """
    Calculate fuel efficiency (liters per nautical mile) for each ferry.
    """
    # Create a copy to avoid modifying original data
    efficiency_data = df.copy()
    
    # Select relevant columns and drop rows with missing values
    efficiency_data = efficiency_data[[
        'ferry_name',
        'fuelcons_outbound_l',
        'fuelcons_inbound_l',
        'distance_outbound_nm',
        'distance_inbound_nm'
    ]].dropna()
    
    if efficiency_data.empty:
        print("No valid data found for fuel efficiency analysis")
        return None
    
    # Calculate total fuel consumption and distance for each ferry
    ferry_efficiency = efficiency_data.groupby('ferry_name').agg({
        'fuelcons_outbound_l': 'sum',
        'fuelcons_inbound_l': 'sum',
        'distance_outbound_nm': 'sum',
        'distance_inbound_nm': 'sum'
    })
    
    # Calculate total fuel consumption and distance
    ferry_efficiency['total_fuel_l'] = (ferry_efficiency['fuelcons_outbound_l'] + 
                                       ferry_efficiency['fuelcons_inbound_l'])
    ferry_efficiency['total_distance_nm'] = (ferry_efficiency['distance_outbound_nm'] + 
                                           ferry_efficiency['distance_inbound_nm'])
    
    # Calculate fuel efficiency (liters per nautical mile)
    ferry_efficiency['efficiency_l_per_nm'] = (ferry_efficiency['total_fuel_l'] / 
                                             ferry_efficiency['total_distance_nm'])
    
    # Sort by efficiency (ascending order - lower is better)
    ferry_efficiency = ferry_efficiency.sort_values('efficiency_l_per_nm')
    
    # Print results
    print("\nFuel Efficiency Analysis (liters per nautical mile):")
    print("Lower values indicate better fuel efficiency")
    print("-" * 50)
    for ferry, row in ferry_efficiency.iterrows():
        print(f"{ferry}: {row['efficiency_l_per_nm']:.2f} L/NM")
    
    # Print the most efficient ferry
    most_efficient = ferry_efficiency.index[0]
    print(f"\nThe most fuel-efficient ferry is {most_efficient} with a consumption of {ferry_efficiency['efficiency_l_per_nm'].iloc[0]:.2f} L/NM")
    
    return ferry_efficiency

# Call the function
ferry_efficiency = analyze_fuel_efficiency(ferry_data)


Fuel Efficiency Analysis (liters per nautical mile):
Lower values indicate better fuel efficiency
--------------------------------------------------
Merkurius: 6.08 L/NM
Nina: 8.37 L/NM
Yxlan: 10.82 L/NM
Fragancia: 11.92 L/NM
Jupiter: 12.30 L/NM

The most fuel-efficient ferry is Merkurius with a consumption of 6.08 L/NM


# Q11: What is the average utilization percentage for Oxdjupsleden (average PCE vs the ferry's maximum capacity)?

In [332]:
import json
import pandas as pd

def calculate_oxdjupsleden_utilization(df, ferries_data):
    """
    Calculate the average utilization percentage for Oxdjupsleden by comparing 
    average PCE to the ferry's maximum capacity.
    
    Args:
        df: DataFrame containing ferry trips data
        ferries_data: Dictionary containing ferry capacity information
        
    Returns:
        float: The utilization percentage for Oxdjupsleden
    """
    # Filter for only Oxdjupsleden route
    oxdjupsleden_data = df[df['route_name'] == 'Oxdjupsleden'].copy()
    
    if oxdjupsleden_data.empty:
        print("No data found for Oxdjupsleden route")
        return None
    
    # Calculate average PCE for outbound and inbound trips
    avg_outbound_pce = oxdjupsleden_data['passenger_car_equivalent_outbound'].mean()
    avg_inbound_pce = oxdjupsleden_data['passenger_car_equivalent_inbound'].mean()
    
    # Calculate overall average PCE per trip
    avg_pce = (avg_outbound_pce + avg_inbound_pce) / 2
    
    # Get capacity for Fragancia (the ferry operating on Oxdjupsleden)
    fragancia_capacity = ferries_data.get('fragancia', {}).get('capacity_passenger_car_equivalent')
    if not fragancia_capacity:
        print("Could not find capacity for Fragancia ferry")
        return None
    
    # Calculate utilization percentage
    utilization_percentage = (avg_pce / fragancia_capacity) * 100
    
    # Print results
    print("\nOxdjupsleden Route Utilization Analysis:")
    print("-" * 50)
    print(f"Ferry: Fragancia")
    print(f"Ferry capacity: {fragancia_capacity} PCE")
    print(f"Average outbound PCE: {avg_outbound_pce:.2f}")
    print(f"Average inbound PCE: {avg_inbound_pce:.2f}")
    print(f"Average PCE per trip: {avg_pce:.2f}")
    print(f"Number of trips analyzed: {len(oxdjupsleden_data)}")
    print(f"Utilization percentage: {utilization_percentage:.2f}%")
    
    print(f"\nQ11 Answer: The average utilization percentage for Oxdjupsleden is {utilization_percentage:.2f}%.")
    
    return utilization_percentage

# Load ferry capacity data
with open("data/ferries.json", "r") as file:
    ferries_data = json.load(file)

# Call the function to answer Q11
oxdjupsleden_utilization = calculate_oxdjupsleden_utilization(ferry_data, ferries_data)


Oxdjupsleden Route Utilization Analysis:
--------------------------------------------------
Ferry: Fragancia
Ferry capacity: 34 PCE
Average outbound PCE: 9.95
Average inbound PCE: 9.94
Average PCE per trip: 9.94
Number of trips analyzed: 17505
Utilization percentage: 29.25%

Q11 Answer: The average utilization percentage for Oxdjupsleden is 29.25%.


# Q12: What's the average number of vehicles left at terminals in June for Furusundsleden (combine outbound and inboud)? 

In [333]:
def average_vehicles_left_per_month(df):
    """
    Calculates the average number of vehicles left at terminals per month,
    focusing on Furusundsleden in June.
    """
    # Create a copy to avoid warnings
    df = df.copy()
    
    # 1. Calculate vehicles left at terminal
    df['total_vehicles_left'] = df['vehicles_left_at_terminal_outbound'].fillna(0) + df['vehicles_left_at_terminal_inbound'].fillna(0)

    # 2. Extract Month from 'time_departure'
    df['time_departure'] = pd.to_datetime(df['time_departure'], errors='coerce')
    df['month'] = df['time_departure'].dt.strftime('%Y-%m')

    # 3. Group by route and month
    route_avg = df.groupby(['route_name', 'month'])['total_vehicles_left'].mean().reset_index()
    route_avg = route_avg.rename(columns={'total_vehicles_left': 'avg_vehicles_left'})
    
    # 4. Filter for Furusundsleden in June data
    furusund_june = route_avg[(route_avg['route_name'] == 'Furusundsleden') & 
                              (route_avg['month'].str.contains('-06'))]
    
    # Display only Furusundsleden results
    if not furusund_june.empty:
        furusund_avg = furusund_june['avg_vehicles_left'].values[0]
        print("\nAverage Vehicles Left at Terminals in June for Furusundsleden:")
        print(f"Furusundsleden (June): {furusund_avg:.2f} vehicles left per trip")
        print(f"\nQ12 Answer: The average number of vehicles left at terminals in June for Furusundsleden is {furusund_avg:.2f}")
    else:
        print("\nNo data available for Furusundsleden in June")
        
    return furusund_june

# Calculate and display results
furusund_data = average_vehicles_left_per_month(ferry_data)


Average Vehicles Left at Terminals in June for Furusundsleden:
Furusundsleden (June): 5.01 vehicles left per trip

Q12 Answer: The average number of vehicles left at terminals in June for Furusundsleden is 5.01


# Q13: What is the average daily vehicle throughput per day for Vaxholmsleden (only outbound)?

In [334]:
def calculate_vaxholmsleden_avg_throughput(df):
    """
    Calculate the average vehicle throughput per day for Vaxholmsleden.

    Args:
        df: pandas DataFrame containing ferry operation data.

    Returns:
        float: The average vehicle throughput per day for Vaxholmsleden
    """
    
    
    # Filter data for Vaxholmsleden only
    vaxholmsleden_data = df[df['route_name'] == 'Vaxholmsleden'].copy()
    
    if vaxholmsleden_data.empty:
        print("No data found for Vaxholmsleden route")
        return None

    # Calculate total vehicles per trip
    vaxholmsleden_data['total_vehicles'] = (
        vaxholmsleden_data['cars_outbound'].fillna(0) + 
        vaxholmsleden_data['trucks_outbound'].fillna(0) + 
        vaxholmsleden_data['trucks_with_trailer_outbound'].fillna(0)
    )
    
    # Convert departure time to date
    vaxholmsleden_data['date'] = pd.to_datetime(vaxholmsleden_data['time_departure']).dt.date
    
    # Calculate total vehicles per day
    daily_throughput = vaxholmsleden_data.groupby('date')['total_vehicles'].sum().reset_index()
    
    # Calculate the average daily throughput
    avg_daily_throughput = daily_throughput['total_vehicles'].mean()
    
    # Print detailed analysis
    print("\nVaxholmsleden Average Daily Vehicle Throughput Analysis:")
    print("-" * 60)
    print(f"Number of days analyzed: {len(daily_throughput)}")
    print(f"Average vehicles per day: {avg_daily_throughput:.2f}")
    print(f"Minimum vehicles in a day: {daily_throughput['total_vehicles'].min():.0f}")
    print(f"Maximum vehicles in a day: {daily_throughput['total_vehicles'].max():.0f}")
    
    print(f"\nQ13 Answer: The average vehicle throughput per day for Vaxholmsleden is {avg_daily_throughput:.2f} vehicles.")
    
    return avg_daily_throughput

# Execute the function using the existing ferry_data
vaxholmsleden_avg_throughput = calculate_vaxholmsleden_avg_throughput(ferry_data)


Vaxholmsleden Average Daily Vehicle Throughput Analysis:
------------------------------------------------------------
Number of days analyzed: 366
Average vehicles per day: 743.98
Minimum vehicles in a day: 0
Maximum vehicles in a day: 1387

Q13 Answer: The average vehicle throughput per day for Vaxholmsleden is 743.98 vehicles.


Q: What is Maximum Passenger Car Equivalent (PCE) during the peak hour of Aspöleden route in February?


# Q14: What's the average PCE for Oxdjupsleden on Thursdays (calculated as the mean of the average inbound PCE and average outbound PCE)?

In [335]:
def calculate_thursday_pce_oxdjupsleden(df):
    """
    Calculate the average PCE for Thursdays for route Oxdjupsleden.
    
    Args:
        df: pandas DataFrame containing ferry operation data
        
    Returns:
        float: Average PCE for Thursdays on Oxdjupsleden route
    """
    # Filter for Oxdjupsleden route
    oxdjup_data = df[df['route_name'] == 'Oxdjupsleden'].copy()
    
    if oxdjup_data.empty:
        print("No data available for Oxdjupsleden route")
        return None
    
    # Convert departure time to datetime
    oxdjup_data['time_departure'] = pd.to_datetime(oxdjup_data['time_departure'])
    
    # Extract day of week (0=Monday, 6=Sunday)
    oxdjup_data['day_of_week'] = oxdjup_data['time_departure'].dt.dayofweek
    
    # Filter for Thursdays (3 in pandas dayofweek)
    thursday_data = oxdjup_data[oxdjup_data['day_of_week'] == 3]
    
    if thursday_data.empty:
        print("No Thursday data available for Oxdjupsleden route")
        return None
    
    # Calculate average PCE for outbound and inbound trips on Thursdays
    avg_outbound_pce = thursday_data['passenger_car_equivalent_outbound'].mean()
    avg_inbound_pce = thursday_data['passenger_car_equivalent_inbound'].mean()
    
    # Calculate overall average PCE for Thursdays
    avg_pce = (avg_outbound_pce + avg_inbound_pce) / 2
    
    # Print detailed results
    print("\nThursday PCE Analysis for Oxdjupsleden:")
    print("-" * 50)
    print(f"Number of Thursday trips analyzed: {len(thursday_data)}")
    print(f"Average outbound PCE on Thursdays: {avg_outbound_pce:.2f}")
    print(f"Average inbound PCE on Thursdays: {avg_inbound_pce:.2f}")
    print(f"Average PCE per trip on Thursdays: {avg_pce:.2f}")
    
    print(f"\nQ14 Answer: The average PCE for Thursdays for route Oxdjupsleden is {avg_pce:.2f}.")
    
    return avg_pce

# Execute the function
thursday_pce = calculate_thursday_pce_oxdjupsleden(ferry_data)


Thursday PCE Analysis for Oxdjupsleden:
--------------------------------------------------
Number of Thursday trips analyzed: 2771
Average outbound PCE on Thursdays: 10.14
Average inbound PCE on Thursdays: 9.94
Average PCE per trip on Thursdays: 10.04

Q14 Answer: The average PCE for Thursdays for route Oxdjupsleden is 10.04.



# Q15: Which hour of the day has the highest average passenger car equivalent (PCE) for route Ljusteröleden (with inbound and outbound trips combined)?

In [336]:
def analyze_pce_variation_ljusteroleden(df):
    """
    Analyzes how the passenger car equivalent (PCE) varies on average throughout the day
    for route Ljusteröleden.
    
    Args:
        df: pandas DataFrame containing ferry operation data
        
    Returns:
        DataFrame: Hourly statistics of PCE for Ljusteröleden route
    """
    # Filter for Ljusteröleden route
    ljustero_data = df[df['route_name'] == 'Ljusteröleden'].copy()
    
    if ljustero_data.empty:
        print("No data available for Ljusteröleden route")
        return pd.DataFrame()
    
    # Convert departure time to datetime
    ljustero_data['time_departure'] = pd.to_datetime(ljustero_data['time_departure'])
    
    # Extract hour of day
    ljustero_data['hour_of_day'] = ljustero_data['time_departure'].dt.hour
    
    # Calculate total PCE for each trip (outbound + inbound)
    ljustero_data['total_pce'] = (
        ljustero_data['passenger_car_equivalent_outbound'].fillna(0) + 
        ljustero_data['passenger_car_equivalent_inbound'].fillna(0)
    )
    
    # Group by hour of day and calculate statistics
    hourly_stats = ljustero_data.groupby('hour_of_day').agg(
        avg_pce_outbound=('passenger_car_equivalent_outbound', 'mean'),
        avg_pce_inbound=('passenger_car_equivalent_inbound', 'mean'),
        avg_total_pce=('total_pce', 'mean'),
        trip_count=('route_id', 'count')
    ).reset_index()
    
    # Ensure we have all hours (0-23)
    full_hours = pd.DataFrame({'hour_of_day': range(0, 24)})
    hourly_stats = pd.merge(full_hours, hourly_stats, on='hour_of_day', how='left').fillna(0)
    
    # Sort by hour of day
    hourly_stats = hourly_stats.sort_values('hour_of_day')
    
    # Find the peak hours and PCE values
    peak_hour_outbound = hourly_stats.loc[hourly_stats['avg_pce_outbound'].idxmax()]
    peak_hour_inbound = hourly_stats.loc[hourly_stats['avg_pce_inbound'].idxmax()]
    
    # Find the hour with the highest combined PCE (outbound + inbound)
    max_combined_hour = hourly_stats.loc[hourly_stats['avg_total_pce'].idxmax()]
    
    print("\nPeak PCE times for Ljusteröleden:")
    print(f"Outbound: Hour {peak_hour_outbound['hour_of_day']} with average PCE of {peak_hour_outbound['avg_pce_outbound']:.2f}")
    print(f"Inbound: Hour {peak_hour_inbound['hour_of_day']} with average PCE of {peak_hour_inbound['avg_pce_inbound']:.2f}")
    print(f"Combined average: {max_combined_hour['avg_total_pce']:.2f}")
    
    print(f"\nThe hour with the highest combined average PCE is no.: {max_combined_hour['hour_of_day']}")
    
    return hourly_stats

# Execute the function with the already loaded ferry_data
hourly_pce_stats = analyze_pce_variation_ljusteroleden(ferry_data)


Peak PCE times for Ljusteröleden:
Outbound: Hour 15.0 with average PCE of 35.49
Inbound: Hour 15.0 with average PCE of 36.63
Combined average: 72.12

The hour with the highest combined average PCE is no.: 15.0



# Q16 What is the average passenger car equivalent (PCE) during June, July, August for Vaxholmsleden route (calculated as the mean of the average inbound PCE and average outbound PCE)?


In [337]:
# Q16: What is the average passenger car equivalent (PCE) during June, July, August for Vaxholmsleden route (calculated as the mean of the average inbound PCE and average outbound PCE)?

def calculate_summer_pce_vaxholmsleden(df):
    """
    Calculates the average passenger car equivalent (PCE) during summer months 
    for the Vaxholmsleden route, as the mean of average outbound PCE and average inbound PCE.
    
    Args:
        df: pandas DataFrame containing ferry operation data
        
    Returns:
        float: Average PCE during summer for Vaxholmsleden
    """
    # Make a copy of the dataframe to avoid warnings
    df_copy = df.copy()
    
    # Convert time_departure to datetime for filtering
    df_copy['time_departure'] = pd.to_datetime(df_copy['time_departure'], errors='coerce')
    
    # Filter data for Vaxholmsleden route
    vaxholm_data = df_copy[df_copy['route_name'] == 'Vaxholmsleden']
    
    if vaxholm_data.empty:
        print("No data found for Vaxholmsleden route")
        return None
    
    # Define summer months (June, July, August)
    summer_months = [6, 7, 8]
    
    # Filter for summer months
    summer_data = vaxholm_data[vaxholm_data['time_departure'].dt.month.isin(summer_months)]
    
    if summer_data.empty:
        print("No summer data found for Vaxholmsleden route")
        return None
    
    # Calculate average PCE for outbound and inbound SEPARATELY
    avg_outbound_pce = summer_data['passenger_car_equivalent_outbound'].fillna(0).mean()
    avg_inbound_pce = summer_data['passenger_car_equivalent_inbound'].fillna(0).mean()
    
    # Calculate the MEAN of the two averages
    avg_summer_pce = (avg_outbound_pce + avg_inbound_pce) / 2
    
    # Calculate average PCE by month for context
    monthly_pce = summer_data.groupby(summer_data['time_departure'].dt.month).agg({
        'passenger_car_equivalent_outbound': lambda x: x.fillna(0).mean(),
        'passenger_car_equivalent_inbound': lambda x: x.fillna(0).mean()
    })
    
    monthly_pce['total_avg_pce'] = (monthly_pce['passenger_car_equivalent_outbound'] + 
                                   monthly_pce['passenger_car_equivalent_inbound']) / 2
    
    # Map month numbers to names for better readability
    month_names = {6: 'June', 7: 'July', 8: 'August'}
    monthly_pce.index = monthly_pce.index.map(month_names)
    
    print("Relevant context:")
    print(f"Monthly breakdown of average PCE for Vaxholmsleden during summer:")
    for month, row in monthly_pce.iterrows():
        print(f"{month}: {row['total_avg_pce']:.2f} PCE")
    print(f"Outbound average PCE: {avg_outbound_pce:.2f}")
    print(f"Inbound average PCE: {avg_inbound_pce:.2f}")
    print(f'Combined average PCE: {avg_summer_pce:.2f}.')
    
    print(f"\nQ16 Answer: The average PCE during June, July and August for Vaxholmsleden route is {avg_summer_pce:.2f}.")
    
    return avg_summer_pce

# Execute the function
avg_summer_pce = calculate_summer_pce_vaxholmsleden(ferry_data)

Relevant context:
Monthly breakdown of average PCE for Vaxholmsleden during summer:
June: 23.21 PCE
July: 19.77 PCE
August: 20.26 PCE
Outbound average PCE: 20.71
Inbound average PCE: 21.35
Combined average PCE: 21.03.

Q16 Answer: The average PCE during June, July and August for Vaxholmsleden route is 21.03.


# Q17: What is the difference in percent of pedestrians to vehicles, comparing weekdays to weekends for Vaxholmsleden?

In [338]:
def analyze_vaxholmsleden_pedestrian_vehicle_weekday_ratio(df):
    """
    Analyze how the ratio of pedestrians to vehicles differs when comparing weekdays to weekends
    for the Vaxholmsleden ferry route.
    
    Args:
        df: pandas DataFrame containing ferry operation data
        
    Returns:
        float: Percentage difference when comparing weekday to weekend pedestrian-to-vehicle ratios
    """
    # Filter for Vaxholmsleden route
    vaxholm_data = df[df['route_name'] == 'Vaxholmsleden'].copy()
    
    if vaxholm_data.empty:
        print("No data found for Vaxholmsleden route")
        return None
    
    # Convert departure time to datetime
    vaxholm_data['time_departure'] = pd.to_datetime(vaxholm_data['time_departure'])
    
    # Create weekday/weekend classifier (0-4 are Monday to Friday, 5-6 are weekend)
    vaxholm_data['is_weekend'] = vaxholm_data['time_departure'].dt.dayofweek >= 5
    
    # Calculate total pedestrians for each trip (outbound + inbound)
    vaxholm_data['total_pedestrians'] = (
        vaxholm_data['pedestrians_outbound'].fillna(0) + 
        vaxholm_data['pedestrians_inbound'].fillna(0)
    )
    
    # Calculate total vehicles for each trip (outbound + inbound)
    vaxholm_data['total_vehicles'] = (
        vaxholm_data['cars_outbound'].fillna(0) + 
        vaxholm_data['trucks_outbound'].fillna(0) + 
        vaxholm_data['trucks_with_trailer_outbound'].fillna(0) + 
        vaxholm_data['motorcycles_outbound'].fillna(0) + 
        vaxholm_data['buses_outbound'].fillna(0) + 
        vaxholm_data['cars_inbound'].fillna(0) + 
        vaxholm_data['trucks_inbound'].fillna(0) + 
        vaxholm_data['trucks_with_trailer_inbound'].fillna(0) + 
        vaxholm_data['motorcycles_inbound'].fillna(0) + 
        vaxholm_data['buses_inbound'].fillna(0)
    )
    
    # Group by weekday/weekend and calculate statistics
    day_type_stats = vaxholm_data.groupby('is_weekend').agg({
        'total_pedestrians': 'sum',
        'total_vehicles': 'sum',
        'route_id': 'count'  # Count trips
    }).reset_index()
    
    # Calculate ratio of pedestrians to vehicles
    day_type_stats['ped_to_vehicle_ratio'] = day_type_stats['total_pedestrians'] / day_type_stats['total_vehicles']
    
    # Extract weekday and weekend ratios
    weekday_ratio = day_type_stats.loc[day_type_stats['is_weekend'] == False, 'ped_to_vehicle_ratio'].values[0]
    weekend_ratio = day_type_stats.loc[day_type_stats['is_weekend'] == True, 'ped_to_vehicle_ratio'].values[0]
    
    # Calculate percentage change (weekday compared to weekend)
    percent_change = ((weekday_ratio - weekend_ratio) / weekend_ratio) * 100
    
    # Print the results
    print("\nVaxholmsleden Pedestrian to Vehicle Ratio Analysis:")
    print("-" * 60)
    print(f"Weekday ratio (Mon-Fri): {weekday_ratio:.3f} pedestrians per vehicle")
    print(f"Weekend ratio (Sat-Sun): {weekend_ratio:.3f} pedestrians per vehicle")
    
    # Add interpretation of the sign
    if percent_change > 0:
        print(f"Percentage change: +{percent_change:.1f}% (higher on weekdays)")
    else:
        print(f"Percentage change: {percent_change:.1f}% (lower on weekdays)")
    
    print(f"\nQ17 Answer: The difference in percent of pedestrians to vehicles, comparing weekdays to weekends for Vaxholmsleden is {percent_change:.1f}%.")
    
    return percent_change

# Execute the function
vaxholm_weekday_weekend_ratio = analyze_vaxholmsleden_pedestrian_vehicle_weekday_ratio(ferry_data)


Vaxholmsleden Pedestrian to Vehicle Ratio Analysis:
------------------------------------------------------------
Weekday ratio (Mon-Fri): 0.185 pedestrians per vehicle
Weekend ratio (Sat-Sun): 0.184 pedestrians per vehicle
Percentage change: +0.9% (higher on weekdays)

Q17 Answer: The difference in percent of pedestrians to vehicles, comparing weekdays to weekends for Vaxholmsleden is 0.9%.


# Q18: At which hour does Oxdjupsleden on average have the highest passenger car equivalent (PCE)?

In [339]:
def analyze_oxdjupsleden_highest_pce_hour(df):
    """
    Determine the hour of day when Oxdjupsleden has the highest average passenger car equivalent (PCE).
    
    Args:
        df: pandas DataFrame containing ferry operation data
        
    Returns:
        int: Hour of day when Oxdjupsleden has the highest average PCE
    """
    # Filter for Oxdjupsleden route only
    oxdjup_data = df[df['route_name'] == 'Oxdjupsleden'].copy()
    
    if oxdjup_data.empty:
        print("No data found for Oxdjupsleden route")
        return None
    
    # Convert time_departure to datetime and extract hour
    oxdjup_data['time_departure'] = pd.to_datetime(oxdjup_data['time_departure'])
    oxdjup_data['hour_of_day'] = oxdjup_data['time_departure'].dt.hour
    
    # Calculate total PCE for each trip (outbound + inbound)
    oxdjup_data['total_pce'] = (
        oxdjup_data['passenger_car_equivalent_outbound'].fillna(0) + 
        oxdjup_data['passenger_car_equivalent_inbound'].fillna(0)
    )
    
    # Group by hour and calculate average PCE
    hourly_pce = oxdjup_data.groupby('hour_of_day').agg(
        avg_pce=('total_pce', 'mean'),
        outbound_avg=('passenger_car_equivalent_outbound', 'mean'),
        inbound_avg=('passenger_car_equivalent_inbound', 'mean'),
        trip_count=('route_id', 'count')
    ).reset_index()
    
    # Sort by average PCE in descending order
    hourly_pce = hourly_pce.sort_values('avg_pce', ascending=False)
    
    # Get the hour with highest average PCE
    peak_hour = hourly_pce.iloc[0]['hour_of_day']
    peak_avg_pce = hourly_pce.iloc[0]['avg_pce']
    
    # Print detailed analysis
    print("\nOxdjupsleden Hourly PCE Analysis:")
    print("-" * 50)
    
    print("\nTop 5 Hours by Average PCE:")
    for i, row in hourly_pce.head(5).iterrows():
        hour = int(row['hour_of_day'])
        print(f"{hour:02d}:00: {row['avg_pce']:.2f}, {row['outbound_avg']:.2f}, {row['inbound_avg']:.2f}, {int(row['trip_count'])}")
    
    print(f"\nQ18 Answer: Oxdjupsleden has the highest average PCE at hour {int(peak_hour):02d}:00")
    
    return peak_hour

# Execute the function
highest_pce_hour = analyze_oxdjupsleden_highest_pce_hour(ferry_data)


Oxdjupsleden Hourly PCE Analysis:
--------------------------------------------------

Top 5 Hours by Average PCE:
15:00: 31.81, 15.46, 16.35, 1139
16:00: 29.77, 15.43, 14.34, 1331
17:00: 24.63, 13.18, 11.45, 1271
14:00: 23.67, 11.76, 11.91, 1340
11:00: 23.46, 11.49, 11.97, 1060

Q18 Answer: Oxdjupsleden has the highest average PCE at hour 15:00


# Q19: What was Aspöleden's ratio of pedestrians to vehicles in 2023?

In [340]:
def calculate_aspoleden_pedestrian_vehicle_ratio(df):
    """
    Calculate Aspöleden's ratio of pedestrians to vehicles in 2023.
    
    Args:
        df: pandas DataFrame containing ferry operation data
        
    Returns:
        float: Ratio of pedestrians to vehicles for Aspöleden in 2023
    """
    # Create a copy to avoid modifying the original dataframe
    analysis_data = df.copy()
    
    # Convert time_departure to datetime for filtering
    analysis_data['time_departure'] = pd.to_datetime(analysis_data['time_departure'])
    
    # Filter data for Aspöleden in 2023
    aspoleden_2023 = analysis_data[
        (analysis_data['route_name'] == 'Aspöleden') & 
        (analysis_data['time_departure'].dt.year == 2023)
    ].copy()
    
    if aspoleden_2023.empty:
        print("No data found for Aspöleden in 2023")
        return None
    
    # Calculate total pedestrians (outbound + inbound)
    aspoleden_2023['total_pedestrians'] = (
        aspoleden_2023['pedestrians_outbound'].fillna(0) + 
        aspoleden_2023['pedestrians_inbound'].fillna(0)
    )
    
    # Calculate total vehicles of all types (outbound + inbound)
    aspoleden_2023['total_vehicles'] = (
        aspoleden_2023['cars_outbound'].fillna(0) + 
        aspoleden_2023['trucks_outbound'].fillna(0) + 
        aspoleden_2023['trucks_with_trailer_outbound'].fillna(0) + 
        aspoleden_2023['motorcycles_outbound'].fillna(0) + 
        aspoleden_2023['buses_outbound'].fillna(0) + 
        aspoleden_2023['cars_inbound'].fillna(0) + 
        aspoleden_2023['trucks_inbound'].fillna(0) + 
        aspoleden_2023['trucks_with_trailer_inbound'].fillna(0) + 
        aspoleden_2023['motorcycles_inbound'].fillna(0) + 
        aspoleden_2023['buses_inbound'].fillna(0)
    )
    
    # Calculate totals across all trips
    total_pedestrians = aspoleden_2023['total_pedestrians'].sum()
    total_vehicles = aspoleden_2023['total_vehicles'].sum()
    
    # Avoid division by zero
    if total_vehicles == 0:
        print("No vehicles recorded for Aspöleden in 2023")
        return float('inf')  # Infinity ratio if no vehicles
    
    # Calculate the ratio
    ped_to_vehicle_ratio = total_pedestrians / total_vehicles
    
    # Print detailed results
    print("\nAspöleden Pedestrian to Vehicle Ratio Analysis (2023):")
    print("-" * 60)
    print(f"Total trips analyzed: {len(aspoleden_2023)}")
    print(f"Total pedestrians: {total_pedestrians:,}")
    print(f"Total vehicles: {total_vehicles:,}")
    print(f"Pedestrian to vehicle ratio: {ped_to_vehicle_ratio:.3f}")
    
    # Additional context: monthly breakdown
    monthly_ratio = aspoleden_2023.groupby(aspoleden_2023['time_departure'].dt.month).agg({
        'total_pedestrians': 'sum',
        'total_vehicles': 'sum'
    })
    monthly_ratio['ratio'] = monthly_ratio['total_pedestrians'] / monthly_ratio['total_vehicles'].replace(0, float('nan'))
    
    print("\nMonthly Breakdown:")
    month_names = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun', 
                   7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
    
    for month, values in monthly_ratio.iterrows():
        month_name = month_names.get(month, str(month))
        print(f"{month_name}: {values['ratio']:.3f} ({int(values['total_pedestrians']):,} pedestrians / {int(values['total_vehicles']):,} vehicles)")
    
    print(f"\nQ19 Answer: Aspöleden's ratio of pedestrians to vehicles in 2023 was {ped_to_vehicle_ratio:.3f}.")
    
    return ped_to_vehicle_ratio

# Execute the function
aspoleden_ped_vehicle_ratio = calculate_aspoleden_pedestrian_vehicle_ratio(ferry_data)


Aspöleden Pedestrian to Vehicle Ratio Analysis (2023):
------------------------------------------------------------
Total trips analyzed: 4631
Total pedestrians: 76,522
Total vehicles: 124,694
Pedestrian to vehicle ratio: 0.614

Monthly Breakdown:
Mar: 0.425 (4,445 pedestrians / 10,458 vehicles)
Apr: 0.473 (5,785 pedestrians / 12,224 vehicles)
May: 0.575 (8,085 pedestrians / 14,061 vehicles)
Jun: 0.751 (9,761 pedestrians / 12,994 vehicles)
Jul: 0.861 (15,350 pedestrians / 17,830 vehicles)
Aug: 0.899 (13,016 pedestrians / 14,472 vehicles)
Sep: 0.617 (6,910 pedestrians / 11,208 vehicles)
Oct: 0.433 (5,122 pedestrians / 11,835 vehicles)
Nov: 0.413 (4,139 pedestrians / 10,029 vehicles)
Dec: 0.408 (3,909 pedestrians / 9,583 vehicles)

Q19 Answer: Aspöleden's ratio of pedestrians to vehicles in 2023 was 0.614.


# Q20: What is the total CO2 emission from Ljusteröleden in February 2024, assuming 2.68 kg CO2 per liter of fuel?

In [341]:
def calculate_ljustero_co2_emissions_feb2024(df):
    """
    Calculates the total CO2 emissions from Ljusteröleden ferry route in February 2024,
    assuming 2.68 kg CO2 per liter of fuel.
    
    Args:
        df: pandas DataFrame containing ferry operation data
        
    Returns:
        float: total CO2 emissions in kg
    """
    # Create a copy to avoid modifying the original dataframe
    emissions_data = df.copy()
    
    # Convert time_departure to datetime for filtering by year and month
    emissions_data['time_departure'] = pd.to_datetime(emissions_data['time_departure'])
    
    # Filter data for Ljusteröleden in February 2024
    filtered_data = emissions_data[
        (emissions_data['route_name'] == 'Ljusteröleden') & 
        (emissions_data['time_departure'].dt.year == 2024) & 
        (emissions_data['time_departure'].dt.month == 2)
    ]
    
    # Sum up fuel consumption (both outbound and inbound)
    total_fuel_outbound = filtered_data['fuelcons_outbound_l'].sum()
    total_fuel_inbound = filtered_data['fuelcons_inbound_l'].sum()
    total_fuel_consumption = total_fuel_outbound + total_fuel_inbound
    
    # CO2 emission factor: 2.68 kg CO2 per liter of fuel
    co2_factor = 2.68
    total_co2_emissions = total_fuel_consumption * co2_factor
    
    # Convert to tons for easier readability if emissions are large
    total_co2_emissions_tons = total_co2_emissions / 1000
    
    # Print the results
    print("CO2 Emissions Analysis for Ljusteröleden in February 2024:")
    print(f"Total fuel consumption: {total_fuel_consumption:,.2f} liters")
    print(f"CO2 emission factor: {co2_factor} kg CO2 per liter")
    print(f"Total CO2 emissions: {total_co2_emissions:,.2f} kg CO2 ({total_co2_emissions_tons:,.2f} tons CO2)")
    
    # Print the direct answer for Q20
    print(f"\nQ20 Answer: The total CO2 emission from Ljusteröleden in February 2024 is {total_co2_emissions:,.2f} kg")
    
    return total_co2_emissions

# Execute the function
calculate_ljustero_co2_emissions_feb2024(ferry_data)

CO2 Emissions Analysis for Ljusteröleden in February 2024:
Total fuel consumption: 16,359.66 liters
CO2 emission factor: 2.68 kg CO2 per liter
Total CO2 emissions: 43,843.88 kg CO2 (43.84 tons CO2)

Q20 Answer: The total CO2 emission from Ljusteröleden in February 2024 is 43,843.88 kg


43843.88434754202