# Task2

## Question 1: Distance Matrix Calculation

In [4]:
import pandas as pd
import numpy as np
import networkx as nx

In [5]:
df2=pd.read_csv(r"C:\Users\HP\Downloads\dataset-3.csv")

In [7]:
def calculate_distance_matrix(df2):
    
    # Create a directed graph using networkx
    G = nx.DiGraph()

    # Add edges with distances to the graph
    for _, row in df2.iterrows():
        G.add_edge(row['id_start'], row['id_end'], weight=row['distance'])

    # Calculate the shortest paths between nodes
    all_pairs_shortest_paths = dict(nx.all_pairs_dijkstra_path_length(G))

    # Create a DataFrame to store distances between IDs
    ids = sorted(set(df2['id_start'].unique()) | set(df2['id_end'].unique()))
    distance_matrix = pd.DataFrame(index=ids, columns=ids)

    # Fill the distance matrix with cumulative distances
    for source in ids:
        for destination in ids:
            if source == destination:
                distance_matrix.at[source, destination] = 0
            elif destination in all_pairs_shortest_paths[source]:
                distance_matrix.at[source, destination] = all_pairs_shortest_paths[source][destination]
            else:
                # If there is no direct route, set distance to NaN
                distance_matrix.at[source, destination] = float('nan')

    return distance_matrix

result_distance_matrix = calculate_distance_matrix(df2)
result_distance_matrix.fillna(0, inplace=True)
# Display the resulting distance matrix
print(result_distance_matrix)

         1001400  1001402  1001404  1001406  1001408  1001410  1001412  \
1001400        0      9.7     29.9     45.9     67.6     78.7     94.3   
1001402        0      0.0     20.2     36.2     57.9     69.0     84.6   
1001404        0      0.0      0.0     16.0     37.7     48.8     64.4   
1001406        0      0.0      0.0      0.0     21.7     32.8     48.4   
1001408        0      0.0      0.0      0.0      0.0     11.1     26.7   
1001410        0      0.0      0.0      0.0      0.0      0.0     15.6   
1001412        0      0.0      0.0      0.0      0.0      0.0      0.0   
1001414        0      0.0      0.0      0.0      0.0      0.0      0.0   
1001416        0      0.0      0.0      0.0      0.0      0.0      0.0   
1001418        0      0.0      0.0      0.0      0.0      0.0      0.0   
1001420        0      0.0      0.0      0.0      0.0      0.0      0.0   
1001422        0      0.0      0.0      0.0      0.0      0.0      0.0   
1001424        0      0.0      0.0    

## Question 2: Unroll Distance Matrix

In [8]:
import pandas as pd

def unroll_distance_matrix(distance_matrix):
    # Create lists to store unrolled data
    id_start_list = []
    id_end_list = []
    distance_list = []

    # Iterate over the distance matrix
    for id_start in distance_matrix.index:
        for id_end in distance_matrix.columns:
            if id_start != id_end:
                # Append values to lists
                id_start_list.append(id_start)
                id_end_list.append(id_end)
                distance_list.append(distance_matrix.at[id_start, id_end])

    # Create a DataFrame from the lists
    unrolled_df = pd.DataFrame({'id_start': id_start_list, 'id_end': id_end_list, 'distance': distance_list})

    return unrolled_df

result_unrolled_df = unroll_distance_matrix(result_distance_matrix)


print(result_unrolled_df)

      id_start   id_end  distance
0      1001400  1001402       9.7
1      1001400  1001404      29.9
2      1001400  1001406      45.9
3      1001400  1001408      67.6
4      1001400  1001410      78.7
...        ...      ...       ...
1801   1004356  1001470     159.8
1802   1004356  1001472     175.8
1803   1004356  1001488       0.0
1804   1004356  1004354       2.0
1805   1004356  1004355       4.0

[1806 rows x 3 columns]


## Question 3: Finding IDs within Percentage Threshold

In [9]:
def find_ids_within_ten_percentage_threshold(result_unrolled_df, reference_value):
    # Filter the DataFrame based on the reference value
    reference_df = df2[df2['id_start'] == reference_value]

    # Calculate the average distance for the reference value
    reference_average_distance = reference_df['distance'].mean()

    # Calculate the lower and upper thresholds (within 10%)
    lower_threshold = reference_average_distance * 0.9
    upper_threshold = reference_average_distance * 1.1

    # Filter the DataFrame based on the thresholds
    filtered_df = df2[(df2['distance'] >= lower_threshold) & (df2['distance'] <= upper_threshold)]

    # Get the sorted list of unique id_start values within the threshold
    result_ids = sorted(filtered_df['id_start'].unique())

    return result_ids

reference_value = df2['id_start'] 
result_within_threshold = find_ids_within_ten_percentage_threshold(df2, reference_value)

print(result_within_threshold)


[1001400, 1001408, 1001422, 1001438, 1001450, 1001452, 1001466, 1001468]


## Question 4: Calculate Toll Rate

In [13]:
import pandas as pd

def calculate_toll_rate(result_within_threshold):
    # Define rate coefficients for each vehicle type
    rate_coefficients = {'moto': 0.8, 'car': 1.2, 'rv': 1.5, 'bus': 2.2, 'truck': 3.6}

    # Iterate over vehicle types and calculate toll rates
    for vehicle_type, rate_coefficient in rate_coefficients.items():
        column_name = f'{vehicle_type}_toll'  # Define the column name
        df2[column_name] = df2['distance'] * rate_coefficient  # Calculate toll rate

    return df2

result_with_toll_rates = calculate_toll_rate(result_unrolled_df)

# Display the resulting DataFrame with toll rates
print(result_with_toll_rates)


    id_start   id_end  distance  moto_toll  car_toll  rv_toll  bus_toll  \
0    1001400  1001402       9.7       7.76     11.64    14.55     21.34   
1    1001402  1001404      20.2      16.16     24.24    30.30     44.44   
2    1001404  1001406      16.0      12.80     19.20    24.00     35.20   
3    1001406  1001408      21.7      17.36     26.04    32.55     47.74   
4    1001408  1001410      11.1       8.88     13.32    16.65     24.42   
5    1001410  1001412      15.6      12.48     18.72    23.40     34.32   
6    1001412  1001414      18.2      14.56     21.84    27.30     40.04   
7    1001414  1001416      13.2      10.56     15.84    19.80     29.04   
8    1001416  1001418      13.6      10.88     16.32    20.40     29.92   
9    1001418  1001420      12.9      10.32     15.48    19.35     28.38   
10   1001420  1001422       9.6       7.68     11.52    14.40     21.12   
11   1001422  1001424      11.4       9.12     13.68    17.10     25.08   
12   1001424  1001426    

## Question 5: Calculate Time-Based Toll Rates

In [15]:
def calculate_time_based_toll_rates(df2):
    df2['start_time'] = pd.to_datetime(df2['start_time'])
    df2['end_time'] = pd.to_datetime(df2['end_time'])
    weekday_morning = pd.to_datetime('10:00:00').time()
    weekday_evening = pd.to_datetime('18:00:00').time()
    def apply_discount(row):
        if row['start_time'].weekday() < 5:  # Weekdays (Monday - Friday)
            if row['start_time'].time() < weekday_morning:
                return row * 0.8
            elif row['start_time'].time() < weekday_evening:
                return row * 1.2
            else:
                return row * 0.8
        else: 
            return row * 0.7
    vehicles = ['moto', 'car', 'rv', 'bus', 'truck']
    for vehicle in vehicles:
        df2[vehicle] = df2[vehicle].apply(apply_discount)
    days_of_week = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'}
    df2['start_day'] = df2['start_time'].dt.weekday.map(days_of_week)
    df2['end_day'] = df2['end_time'].dt.weekday.map(days_of_week)
    df2['start_time'] = df2['start_time'].dt.time
    df2['end_time'] = df2['end_time'].dt.time
    return df2