In [7]:
import pandas as pd
import numpy as np

def calculate_distance_matrix(df) -> pd.DataFrame:
   
    toll_locations = pd.concat([df['id_start'], df['id_end']]).unique()
    toll_locations.sort()  
      
    distance_matrix = pd.DataFrame(np.inf, index=toll_locations, columns=toll_locations)
     
    for _, row in df.iterrows():
        start, end, distance = row['id_start'], row['id_end'], row['distance']
        distance_matrix.at[start, end] = distance
        distance_matrix.at[end, start] = distance  
    
    np.fill_diagonal(distance_matrix.values, 0)

    for k in toll_locations:
        for i in toll_locations:
            for j in toll_locations:
                if distance_matrix.at[i, j] > distance_matrix.at[i, k] + distance_matrix.at[k, j]:
                    distance_matrix.at[i, j] = distance_matrix.at[i, k] + distance_matrix.at[k, j]

    return distance_matrix

file_path = r'C:\Users\kartik\Desktop\dataset-2.csv'
df = pd.read_csv(file_path)
distance_matrix = calculate_distance_matrix(df)
print(distance_matrix)


         1001400  1001402  1001404  1001406  1001408  1001410  1001412  \
1001400      0.0      9.7     29.9     45.9     67.6     78.7     94.3   
1001402      9.7      0.0     20.2     36.2     57.9     69.0     84.6   
1001404     29.9     20.2      0.0     16.0     37.7     48.8     64.4   
1001406     45.9     36.2     16.0      0.0     21.7     32.8     48.4   
1001408     67.6     57.9     37.7     21.7      0.0     11.1     26.7   
1001410     78.7     69.0     48.8     32.8     11.1      0.0     15.6   
1001412     94.3     84.6     64.4     48.4     26.7     15.6      0.0   
1001414    112.5    102.8     82.6     66.6     44.9     33.8     18.2   
1001416    125.7    116.0     95.8     79.8     58.1     47.0     31.4   
1001418    139.3    129.6    109.4     93.4     71.7     60.6     45.0   
1001420    152.2    142.5    122.3    106.3     84.6     73.5     57.9   
1001422    161.8    152.1    131.9    115.9     94.2     83.1     67.5   
1001424    173.2    163.5    143.3    

In [17]:
import pandas as pd

def unroll_distance_matrix(df) -> pd.DataFrame:
    rows = []  # Create a  blank list to hold the DataFrame
    for id_start in df.index:  # use loop for the distance matrix to collect id_start, id_end, and distance
        for id_end in df.columns:
            if id_start != id_end:  # Exclude the same id_start and id_end
                distance = df.at[id_start, id_end]
                rows.append({'id_start': id_start, 'id_end': id_end, 'distance': distance})
    

    unrolled_df = pd.DataFrame(rows)    # Convert the list of rows into a DataFrame
    return unrolled_df
file_path = r'C:\Users\kartik\Desktop\dataset-2.csv'
df = pd.read_csv(file_path)
distance_matrix = calculate_distance_matrix(df)


unrolled_df = unroll_distance_matrix(distance_matrix)
print(unrolled_df.head(10))



   id_start   id_end  distance
0   1001400  1001402       9.7
1   1001400  1001404      29.9
2   1001400  1001406      45.9
3   1001400  1001408      67.6
4   1001400  1001410      78.7
5   1001400  1001412      94.3
6   1001400  1001414     112.5
7   1001400  1001416     125.7
8   1001400  1001418     139.3
9   1001400  1001420     152.2


In [36]:
import pandas as pd
import numpy as np

def find_ids_within_ten_percentage_threshold(df: pd.DataFrame, reference_id: int) -> pd.DataFrame:
    
    
    reference_distances = df[df['id_start'] == reference_id]['distance']   #distances associated with the reference ID

    if reference_distances.empty:
        print(f"No distances found for reference ID: {reference_id}")
        return pd.DataFrame(columns=['id_start'])  # Return empty DataFrame with the appropriate column name

    average_distance = reference_distances.mean()
 
    threshold_range = (average_distance * 0.9, average_distance * 1.1)   #10% threshold range
    print(f"Reference ID: {reference_id}, Average Distance: {average_distance:.2f}, "   # average and threshold information
          f"Threshold Range: {threshold_range[0]:.2f} to {threshold_range[1]:.2f}")

    within_threshold_ids = df[(df['distance'] >= threshold_range[0]) & 
                               (df['distance'] <= threshold_range[1])]['id_start']
     
    result_df = pd.DataFrame(within_threshold_ids.unique(), columns=['id_start'])# Create a DataFrame of unique IDs

    result_df.sort_values(by='id_start', inplace=True)    # Sort the DataFrame by id_start for consistency


    return result_df.reset_index(drop=True)  # Reset index for a clean DataFrame

unrolled_df = unroll_distance_matrix(distance_matrix)   #unrolled_df is the DataFrame generated from the distance matrix

reference_id = 1001400  # reference ID taken from data
result_df = find_ids_within_ten_percentage_threshold(unrolled_df, reference_id)

print(result_df)


Reference ID: 1001400, Average Distance: 244.00, Threshold Range: 219.60 to 268.39
    id_start
0    1001400
1    1001402
2    1001404
3    1001406
4    1001408
5    1001410
6    1001412
7    1001414
8    1001416
9    1001418
10   1001420
11   1001422
12   1001424
13   1001426
14   1001428
15   1001430
16   1001432
17   1001434
18   1001436
19   1001437
20   1001438
21   1001440
22   1001442
23   1001444
24   1001446
25   1001448
26   1001450
27   1001452
28   1001454
29   1001456
30   1001458
31   1001460
32   1001461
33   1001462
34   1001464
35   1001466
36   1001468
37   1001470
38   1001472
39   1001488
40   1004354
41   1004355
42   1004356


In [38]:
import pandas as pd

def calculate_toll_rate(df: pd.DataFrame) -> pd.DataFrame:

    # Rate coefficients for different vehicle types
    rates = {
        'moto': 0.8,
        'car': 1.2,
        'rv': 1.5,
        'bus': 2.2,
        'truck': 3.6 }
    for vehicle, rate in rates.items():
        df[vehicle] = df['distance'] * rate

    return df

unrolled_df = unroll_distance_matrix(distance_matrix)   # The DataFrame generated from unroll_distance_matrix

toll_rates_df = calculate_toll_rate(unrolled_df)
print(toll_rates_df.head(10))


   id_start   id_end  distance    moto     car      rv     bus   truck
0   1001400  1001402       9.7    7.76   11.64   14.55   21.34   34.92
1   1001400  1001404      29.9   23.92   35.88   44.85   65.78  107.64
2   1001400  1001406      45.9   36.72   55.08   68.85  100.98  165.24
3   1001400  1001408      67.6   54.08   81.12  101.40  148.72  243.36
4   1001400  1001410      78.7   62.96   94.44  118.05  173.14  283.32
5   1001400  1001412      94.3   75.44  113.16  141.45  207.46  339.48
6   1001400  1001414     112.5   90.00  135.00  168.75  247.50  405.00
7   1001400  1001416     125.7  100.56  150.84  188.55  276.54  452.52
8   1001400  1001418     139.3  111.44  167.16  208.95  306.46  501.48
9   1001400  1001420     152.2  121.76  182.64  228.30  334.84  547.92


In [59]:
import pandas as pd
import numpy as np
from datetime import time

def calculate_time_based_toll_rates(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate time-based toll rates for different vehicle types, modifying values based on time intervals 
    and day of the week.

    Args:
        df (pandas.DataFrame): Input DataFrame with toll rates for different vehicles.

    Returns:
        pandas.DataFrame: DataFrame with additional time-based toll rates for all vehicles.
    """
    # Define discount factors based on time intervals and day types
    def get_discount_factor(day, current_time):
        # Weekday logic (Monday - Friday)
        if day in ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']:
            if time(0, 0) <= current_time <= time(10, 0):
                return 0.8
            elif time(10, 0) <= current_time <= time(18, 0):
                return 1.2
            else:
                return 0.8
        # Weekend logic (Saturday, Sunday)
        else:  # Saturday, Sunday
            return 0.7

    # Create an empty list to store rows for time-based toll rates
    rows_list = []

    # Days of the week and time intervals (24-hour time periods split for a full day)
    days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    time_intervals = [
        (time(0, 0), time(10, 0)),
        (time(10, 0), time(18, 0)),
        (time(18, 0), time(23, 59, 59))
    ]

    # Loop over each row in the original DataFrame
    for _, row in df.iterrows():
        id_start = row['id_start']
        id_end = row['id_end']
        base_rates = {  # Extract vehicle base rates
            'moto': row['moto'],
            'car': row['car'],
            'rv': row['rv'],
            'bus': row['bus'],
            'truck': row['truck']
        }

        # Generate rows for each day of the week and time interval
        for day in days:
            for start_time, end_time in time_intervals:
                # Calculate discount factor based on day and time interval
                discount_factor = get_discount_factor(day, start_time)

                # Apply discount factor to each vehicle rate
                adjusted_rates = {vehicle: rate * discount_factor for vehicle, rate in base_rates.items()}

                # Add the new row to the list
                rows_list.append({
                    'id_start': id_start,
                    'id_end': id_end,
                    'start_day': day,
                    'start_time': start_time,
                    'end_day': day,
                    'end_time': end_time,
                    **adjusted_rates  # Add the adjusted rates for moto, car, rv, bus, truck
                })

    # Convert the list of rows into a DataFrame directly from the list of dictionaries
    time_based_df = pd.DataFrame(rows_list)

    return time_based_df

# Example usage:
# Assuming toll_rates_df is the DataFrame created with toll rates for different vehicles
# Generate time-based toll rates for all days and time intervals
time_based_toll_rates_df = calculate_time_based_toll_rates(toll_rates_df)

# Display the result
print(time_based_toll_rates_df)


        id_start     id_end start_day start_time   end_day  end_time   moto  \
0      1001400.0  1001402.0    Monday   00:00:00    Monday  10:00:00  6.208   
1      1001400.0  1001402.0    Monday   10:00:00    Monday  18:00:00  6.208   
2      1001400.0  1001402.0    Monday   18:00:00    Monday  23:59:59  9.312   
3      1001400.0  1001402.0   Tuesday   00:00:00   Tuesday  10:00:00  6.208   
4      1001400.0  1001402.0   Tuesday   10:00:00   Tuesday  18:00:00  6.208   
...          ...        ...       ...        ...       ...       ...    ...   
37921  1004356.0  1004355.0  Saturday   10:00:00  Saturday  18:00:00  2.240   
37922  1004356.0  1004355.0  Saturday   18:00:00  Saturday  23:59:59  2.240   
37923  1004356.0  1004355.0    Sunday   00:00:00    Sunday  10:00:00  2.240   
37924  1004356.0  1004355.0    Sunday   10:00:00    Sunday  18:00:00  2.240   
37925  1004356.0  1004355.0    Sunday   18:00:00    Sunday  23:59:59  2.240   

          car     rv     bus   truck  
0       9.31