In [1]:
import pandas as pd


def calculate_distance_matrix(df)->pd.DataFrame():
    """
    Calculate a distance matrix based on the dataframe, df.

    Args:
        df (pandas.DataFrame)

    Returns:
        pandas.DataFrame: Distance matrix
    """
    df = pd.read_csv("dataset-2.csv")
    
    unique_tolls = sorted(set(df['toll_id_A'].unique()) | set(df['toll_id_B'].unique()))
    
    distance_matrix = pd.DataFrame(index=unique_tolls, columns=unique_tolls)
    
    for index, row in df.iterrows():
        toll_id_A = row['toll_id_A']
        toll_id_B = row['toll_id_B']
        distance_AB = row['distance']
        
        distance_matrix.loc[toll_id_A, toll_id_B] = distance_matrix.get(toll_id_A, {}).get(toll_id_B, 0) + distance_AB
        
        distance_matrix.loc[toll_id_B, toll_id_A] = distance_matrix.get(toll_id_B, {}).get(toll_id_A, 0) + distance_AB
    
    distance_matrix = distance_matrix.fillna(0)

    return df

In [2]:
def unroll_distance_matrix(df)->pd.DataFrame():
    """
    Unroll a distance matrix to a DataFrame in the style of the initial dataset.

    Args:
        df (pandas.DataFrame)

    Returns:
        pandas.DataFrame: Unrolled DataFrame containing columns 'id_start', 'id_end', and 'distance'.
    """
    unique_tolls = distance_matrix.index
    
    id_start_list, id_end_list, distance_list = [], [], []
    
    for id_start in unique_tolls:
        for id_end in unique_tolls:
            if id_start != id_end:
                distance = distance_matrix.loc[id_start, id_end]
                id_start_list.append(id_start)
                id_end_list.append(id_end)
                distance_list.append(distance)
                
    result_df = pd.DataFrame({
        'id_start': id_start_list,
        'id_end': id_end_list,
        'distance': distance_list
     })

    return df

In [3]:
def find_ids_within_ten_percentage_threshold(df, reference_id)->pd.DataFrame():
    """
    Find all IDs whose average distance lies within 10% of the average distance of the reference ID.

    Args:
        df (pandas.DataFrame)
        reference_id (int)

    Returns:
        pandas.DataFrame: DataFrame with IDs whose average distance is within the specified percentage threshold
                          of the reference ID's average distance.
    """
    reference_df = input_df[input_df['id_start'] == reference_value]
        
    average_distance = reference_df['distance'].mean()
    
    threshold_lower = 0.9 * average_distance
    
    threshold_upper = 1.1 * average_distance
    
    result_df = input_df[(input_df['distance'] >= threshold_lower) & (input_df['distance'] <= threshold_upper)]

    result_ids = sorted(result_df['id_start'].unique())

    return df

In [4]:
def calculate_toll_rate(df)->pd.DataFrame():
    """
    Calculate toll rates for each vehicle type based on the unrolled DataFrame.

    Args:
        df (pandas.DataFrame)

    Returns:
        pandas.DataFrame
    """
    rate_coefficients = {'moto': 0.8, 'car': 1.2, 'rv': 1.5, 'bus': 2.2, 'truck': 3.6}
    
    for vehicle_type, rate_coefficient in rate_coefficients.items():
        column_name = vehicle_type
        
        input_df[column_name] = input_df['distance'] * rate_coefficient

    return df

In [5]:
def calculate_time_based_toll_rates(df)->pd.DataFrame():
    """
    Calculate time-based toll rates for different time intervals within a day.

    Args:
        df (pandas.DataFrame)

    Returns:
        pandas.DataFrame
    """
    weekday_time_ranges = [(time(0, 0, 0), time(10, 0, 0)),
                           (time(10, 0, 0), time(18, 0, 0)),
                           (time(18, 0, 0), time(23, 59, 59))]
    weekend_time_ranges = [(time(0, 0, 0), time(23, 59, 59))]
    
    input_df['start_day'] = input_df['start_datetime'].dt.day_name()
    input_df['start_time'] = input_df['start_datetime'].dt.time
    input_df['end_day'] = input_df['end_datetime'].dt.day_name()
    input_df['end_time'] = input_df['end_datetime'].dt.time
    
    discount_factors = {
        'weekday': [0.8, 1.2, 0.8],
        'weekend': [0.7]
    }
    
    for index, group in input_df.groupby(['id_start', 'id_end']):
        for _, row in group.iterrows():
            day_type = 'weekday' if row['start_day'] in ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'] else 'weekend'
            
            for start_time, end_time in (weekday_time_ranges if day_type == 'weekday' else weekend_time_ranges):
                if start_time <= row['start_time'] <= end_time and start_time <= row['end_time'] <= end_time:
                    discount_factor = discount_factors[day_type][weekday_time_ranges.index((start_time, end_time))]
                    break
                    
            for vehicle_type in ['moto', 'car', 'rv', 'bus', 'truck']:
                input_df.at[index, vehicle_type] *= discount_factor

    return df