In [8]:
import pandas as pd

# Load the datasets
metrics_stations_over_day = pd.read_csv('Data/Results/metrics_stations_over_day.csv')
station_trip_counts_with_communities = pd.read_csv('Data/Results/station_trip_counts_with_communities.csv')
average_trip_duration_per_station = pd.read_csv('Data/Results/average_trip_duration_per_station.csv')

# Extract the fullness deviation and utilization at hours 8 and 17 from metrics_stations_over_day
fullness_utilization = metrics_stations_over_day[metrics_stations_over_day['hour'].isin([8, 17])]

# Pivot the dataframe to get separate columns for each hour
fullness_utilization_pivot = fullness_utilization.pivot(index='station', columns='hour', 
                                                        values=['fullness_deviation', 'Utilisation_avg'])

# Flatten the columns
fullness_utilization_pivot.columns = [f'{val}_at_{hour}' for val, hour in fullness_utilization_pivot.columns]

# Merge with the station_trip_counts_with_communities and average_trip_duration_per_station
merged_df = station_trip_counts_with_communities.merge(fullness_utilization_pivot, left_on='Station', right_index=True)
merged_df = merged_df.merge(average_trip_duration_per_station, left_on='Station', right_on='Station Name')

# Drop unnecessary columns
merged_df = merged_df.drop(columns=['Total Trips', 'Station Name'])

# Rename the columns for clarity
merged_df.rename(columns={
    'Station': 'Station Name',
    'Start/End': 'Start/End Ratio',
    'Average Trip Duration (minutes)': 'Average Trip Duration (minutes)'
}, inplace=True)

# Optionally, save the result to a CSV file
merged_df.to_csv('Data/Results/Results.csv', index=False)




In [9]:
# Load the new dataset uploaded by the user
results_df = pd.read_csv('Data/Results/Results.csv')

# Rank the stations based on the provided metrics.
# Assuming the following weights based on importance:
# Fullness Deviation (8 or 17) - Weight: 0.4 each
# Station Popularity (Start/End Ratio) - Weight: 0.2
# Utilization (8 or 17) - Weight: 0.2 each
# Trip Duration - Weight: 0.1

# Normalize the metrics to be between 0 and 1
results_df['normalized_fullness_8'] = results_df['fullness_deviation_at_8'] / results_df['fullness_deviation_at_8'].max()
results_df['normalized_fullness_17'] = results_df['fullness_deviation_at_17'] / results_df['fullness_deviation_at_17'].max()
results_df['normalized_popularity'] = results_df['Start/End Ratio'] / results_df['Start/End Ratio'].max()
results_df['normalized_utilization_8'] = results_df['Utilisation_avg_at_8'] / results_df['Utilisation_avg_at_8'].max()
results_df['normalized_utilization_17'] = results_df['Utilisation_avg_at_17'] / results_df['Utilisation_avg_at_17'].max()
results_df['normalized_trip_duration'] = results_df['Average Trip Duration (minutes)'] / results_df['Average Trip Duration (minutes)'].max()

# Calculate a weighted score for each station
results_df['weighted_score'] = (0.4 * results_df['normalized_fullness_8'] +
                                0.4 * results_df['normalized_fullness_17'] +
                                0.2 * results_df['normalized_popularity'] +
                                0.2 * results_df['normalized_utilization_8'] +
                                0.2 * results_df['normalized_utilization_17'] +
                                0.1 * results_df['normalized_trip_duration'])

# Rank the stations based on the weighted score
results_df['rank'] = results_df['weighted_score'].rank(ascending=False)

# Sort by rank
ranked_stations = results_df.sort_values(by='rank')

# Save the ranked stations to a CSV file
output_ranked_file_path = 'Data/Results/Results_ranked.csv'
ranked_stations.to_csv(output_ranked_file_path, index=False)

