In [None]:
## question 1

import pandas as pd

def calculate_distance_matrix(dataframe):
    
    routes = dataframe.pivot(index='id', columns='id_2', values='distance')
    for col in routes.columns:
        for idx in routes.index:
            if routes.at[idx, col] == 0 and idx != col:
            
                inverse_distance = routes.at[col, idx]
                if inverse_distance != 0:
                    routes.at[idx, col] = inverse_distance
                else:
                    common_points = routes.loc[idx][routes.loc[idx] != 0].index & routes.loc[routes.loc[col] != 0].index
                    for point in common_points:
                        distance1 = routes.at[idx, point]
                        distance2 = routes.at[point, col]
                        if distance1 != 0 and distance2 != 0:
                            routes.at[idx, col] = distance1 + distance2
                            break
    
    return routes
dataset_3 = pd.read_csv('dataset-3.csv')
distance_matrix = calculate_distance_matrix(dataset_3)
print(distance_matrix)


In [None]:
## question 2

import pandas as pd
import itertools

def unroll_distance_matrix(distance_df):
    indices = distance_df.index
    columns = distance_df.column
    combinations = list(itertools.product(indices, columns))
    filtered_combinations = [(start, end) for start, end in combinations if start != end]
    
    unrolled_distances = pd.DataFrame(columns=['id_start', 'id_end', 'distance'])
    for start, end in filtered_combinations:
        distance = distance_df.at[start, end]
        unrolled_distances = unrolled_distances.append({'id_start': start, 'id_end': end, 'distance': distance}, ignore_index=True)
    
    return unrolled_distances

result_unrolled_distances = unroll_distance_matrix(distance_matrix)
print(result_unrolled_distances)


In [None]:
## question 3

import pandas as pd

def find_ids_within_ten_percentage_threshold(dataframe, reference_value):
    reference_rows = dataframe[dataframe['id_start'] == reference_value]
    
    reference_avg_distance = reference_rows['distance'].mean()
    
    threshold = 0.1 * reference_avg_distance
    
    within_threshold = dataframe[(dataframe['distance'] >= reference_avg_distance - threshold) & 
                                 (dataframe['distance'] <= reference_avg_distance + threshold)]
    
    sorted_ids_within_threshold = sorted(within_threshold['id_start'].unique())
    
    return sorted_ids_within_threshold

reference_id = 5  
ids_within_threshold = find_ids_within_ten_percentage_threshold(result_unrolled_distances, reference_id)
print(ids_within_threshold)


In [None]:
## question 4

def calculate_toll_rate(dataframe):
    rate_coefficients = {'moto': 0.8, 'car': 1.2, 'rv': 1.5, 'bus': 2.2, 'truck': 3.6}

    for vehicle_type, rate_coefficient in rate_coefficients.items():
        dataframe[vehicle_type] = dataframe['distance'] * rate_coefficient
    
    return dataframe

result_with_toll_rates = calculate_toll_rate(result_unrolled_distances)
print(result_with_toll_rates)


In [None]:
## question 5

import pandas as pd

def calculate_time_based_toll_rates(dataframe):
    
    dataframe['start_time'] = pd.to_datetime(dataframe['start_time'])
    dataframe['end_time'] = pd.to_datetime(dataframe['end_time'])

    weekday_time_ranges = [
        ((0, 0), (10, 0), 0.8),
        ((10, 0), (18, 0), 1.2),
        ((18, 0), (23, 59, 59), 0.8)
    ]
    weekend_time_range = ((0, 0), (23, 59, 59), 0.7)

    def apply_discount(row):
        for time_range, factor in weekday_time_ranges if row['start_day'] in range(0, 5) else [weekend_time_range]:
            start_range = pd.Timestamp.combine(pd.Timestamp(row['start_day']), pd.Timestamp(*time_range[0]))
            end_range = pd.Timestamp.combine(pd.Timestamp(row['start_day']), pd.Timestamp(*time_range[1]))
            if start_range <= row['start_time'] <= end_range:
                return row[['moto', 'car', 'rv', 'bus', 'truck']] * factor
        return row[['moto', 'car', 'rv', 'bus', 'truck']]

    weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
    weekend = ['Saturday', 'Sunday']
    dataframe['start_day'] = dataframe['start_time'].dt.day_name().apply(lambda x: x if x in weekdays + weekend else 'Invalid')
    
    return dataframe

result_with_time_based_rates = calculate_time_based_toll_rates(result_with_toll_rates)
print(result_with_time_based_rates)
