Python Q.1 Distance Matrix Calculation

In [14]:
import pandas as pd
import networkx as nx

def calculate_distance_matrix(csv_file):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file)

    # Check the column names in the DataFrame
    expected_columns = ['id_start', 'id_end', 'distance']
    if not all(col in df.columns for col in expected_columns):
        raise ValueError("CSV file must have columns 'id_start', 'id_end', and 'distance'.")

    # Create a graph to represent toll locations and distances
    G = nx.Graph()

    # Add edges and distances to the graph
    for index, row in df.iterrows():
        G.add_edge(row['id_start'], row['id_end'], distance=row['distance'])

    # Calculate all-pairs shortest paths
    all_pairs_shortest_paths = dict(nx.all_pairs_dijkstra_path_length(G))

    # Create a DataFrame for the distance matrix
    tollbooths = sorted(list(G.nodes))
    distance_matrix_data = [[0] * len(tollbooths) for _ in range(len(tollbooths))]

    for i, source in enumerate(tollbooths):
        for j, target in enumerate(tollbooths):
            if i != j:
                distance_matrix_data[i][j] = all_pairs_shortest_paths[source][target]

    # Create a DataFrame with the distance matrix
    distance_matrix = pd.DataFrame(distance_matrix_data, index=tollbooths, columns=tollbooths)

    return distance_matrix

Python Q.2 Unroll Distance Matrix

In [16]:
import pandas as pd

def unroll_distance_matrix(distance_matrix):
    # Ensure the input is a DataFrame
    if not isinstance(distance_matrix, pd.DataFrame):
        raise ValueError("Input must be a DataFrame.")

    # Extract the column and index names from the distance matrix
    tollbooths = distance_matrix.index

    # Initialize lists to store the unrolled data
    id_start_list = []
    id_end_list = []
    distance_list = []

    # Iterate over the distance matrix to extract non-diagonal elements
    for i, start_toll in enumerate(tollbooths):
        for j, end_toll in enumerate(tollbooths):
            if i != j:
                id_start_list.append(start_toll)
                id_end_list.append(end_toll)
                distance_list.append(distance_matrix.loc[start_toll, end_toll])

    # Create a DataFrame from the unrolled data
    unrolled_df = pd.DataFrame({'id_start': id_start_list, 'id_end': id_end_list, 'distance': distance_list})

    return unrolled_df


Python Q.3 Finding Ids within Percentage Threshold

In [18]:
import pandas as pd

def unroll_distance_matrix(distance_matrix):
    # Ensure the input is a DataFrame
    if not isinstance(distance_matrix, pd.DataFrame):
        raise ValueError("Input must be a DataFrame.")

    # Extract the column and index names from the distance matrix
    tollbooths = distance_matrix.index

    # Initialize lists to store the unrolled data
    id_start_list = []
    id_end_list = []
    distance_list = []

    # Iterate over the distance matrix to extract non-diagonal elements
    for i, start_toll in enumerate(tollbooths):
        for j, end_toll in enumerate(tollbooths):
            if i != j:
                id_start_list.append(start_toll)
                id_end_list.append(end_toll)
                distance_list.append(distance_matrix.loc[start_toll, end_toll])

    # Create a DataFrame from the unrolled data
    unrolled_df = pd.DataFrame({'id_start': id_start_list, 'id_end': id_end_list, 'distance': distance_list})

    return unrolled_df

def find_ids_within_ten_percentage_threshold(distance_df, reference_value):
    # Ensure the input is a DataFrame
    if not isinstance(distance_df, pd.DataFrame):
        raise ValueError("Input must be a DataFrame.")

    # Check the column names in the DataFrame
    expected_columns = ['id_start', 'id_end', 'distance']
    if not all(col in distance_df.columns for col in expected_columns):
        raise ValueError("DataFrame must have columns 'id_start', 'id_end', and 'distance'.")

    # Filter the DataFrame for rows with the given reference value as 'id_start'
    reference_rows = distance_df[distance_df['id_start'] == reference_value]

    # Check if the reference value exists in the DataFrame
    if reference_rows.empty:
        raise ValueError(f"Reference value {reference_value} not found in the DataFrame.")

    # Calculate the average distance for the reference value
    average_distance = reference_rows['distance'].mean()

    # Calculate the threshold for 10% above and below the average distance
    threshold_min = average_distance * 0.9
    threshold_max = average_distance * 1.1

    # Filter rows where 'distance' is within the threshold range
    filtered_rows = distance_df[(distance_df['distance'] >= threshold_min) & (distance_df['distance'] <= threshold_max)]

    # Get unique values from the 'id_start' column and sort them
    result_ids = sorted(filtered_rows['id_start'].unique())

    return result_ids

# Example usage:
# Assuming result_matrix is the distance matrix from Question 1
result_unrolled = unroll_distance_matrix(result_matrix)

# Assuming result_unrolled is the unrolled DataFrame from the previous step
unique_ids = result_unrolled['id_start'].unique()

if len(unique_ids) > 0:
    reference_value = unique_ids[0]
    result_within_threshold = find_ids_within_ten_percentage_threshold(result_unrolled, reference_value)
    print(result_within_threshold)
else:
    print("No unique 'id_start' values found in the DataFrame.")


[1001400.0, 1001402.0, 1001404.0, 1001406.0, 1001408.0, 1001410.0, 1001412.0, 1001414.0, 1001416.0, 1001418.0, 1001420.0, 1001422.0, 1001424.0, 1001426.0, 1001428.0, 1001430.0, 1001432.0, 1001434.0, 1001436.0, 1001437.0, 1001438.0, 1001440.0, 1001442.0, 1001444.0, 1001446.0, 1001448.0, 1001450.0, 1001452.0, 1001454.0, 1001456.0, 1001458.0, 1001460.0, 1001461.0, 1001462.0, 1001464.0, 1001466.0, 1001468.0, 1001470.0, 1001472.0, 1001488.0, 1004354.0, 1004355.0, 1004356.0]


Pyhton Q.4 Calculate Toll Rate

In [19]:
import pandas as pd

def calculate_toll_rate(distance_df):
    # Ensure the input is a DataFrame
    if not isinstance(distance_df, pd.DataFrame):
        raise ValueError("Input must be a DataFrame.")

    # Check the column names in the DataFrame
    expected_columns = ['id_start', 'id_end', 'distance']
    if not all(col in distance_df.columns for col in expected_columns):
        raise ValueError("DataFrame must have columns 'id_start', 'id_end', and 'distance'.")

    # Define rate coefficients for each vehicle type
    rate_coefficients = {
        'moto': 0.8,
        'car': 1.2,
        'rv': 1.5,
        'bus': 2.2,
        'truck': 3.6
    }

    # Calculate toll rates for each vehicle type
    for vehicle_type, rate_coefficient in rate_coefficients.items():
        column_name = f"{vehicle_type}_toll"
        distance_df[column_name] = distance_df['distance'] * rate_coefficient

    return distance_df

# Example usage:
# Assuming result_unrolled is the unrolled DataFrame from Question 3
result_with_toll_rates = calculate_toll_rate(result_unrolled)
print(result_with_toll_rates)


       id_start     id_end  distance  moto_toll  car_toll  rv_toll  bus_toll  \
0     1001400.0  1001402.0         1        0.8       1.2      1.5       2.2   
1     1001400.0  1001404.0         2        1.6       2.4      3.0       4.4   
2     1001400.0  1001406.0         3        2.4       3.6      4.5       6.6   
3     1001400.0  1001408.0         4        3.2       4.8      6.0       8.8   
4     1001400.0  1001410.0         5        4.0       6.0      7.5      11.0   
...         ...        ...       ...        ...       ...      ...       ...   
1801  1004356.0  1001470.0        16       12.8      19.2     24.0      35.2   
1802  1004356.0  1001472.0        17       13.6      20.4     25.5      37.4   
1803  1004356.0  1001488.0         1        0.8       1.2      1.5       2.2   
1804  1004356.0  1004354.0         1        0.8       1.2      1.5       2.2   
1805  1004356.0  1004355.0         2        1.6       2.4      3.0       4.4   

      truck_toll  
0            3.6  
1

Python Q.5 