In [4]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
file_path = "everyday_2024_w_metro_station.csv"
df = pd.read_csv(file_path)

# Display first few rows to inspect data
print(df.head())

   Station Name        Date    Month  Season  Entries (AM Peak (Open-9:30am))  \
0  Addison Road  2024-01-01  January  Winter                              100   
1  Addison Road  2024-01-02  January  Winter                              642   
2  Addison Road  2024-01-03  January  Winter                              864   
3  Addison Road  2024-01-04  January  Winter                              858   
4  Addison Road  2024-01-05  January  Winter                              722   

   Entries (Evening (7pm-12am))  Entries (Late Night (12am-Close))  \
0                            66                                  2   
1                            96                                 15   
2                            84                                 10   
3                            76                                 31   
4                            82                                  1   

   Entries (Midday (9:30am-3pm))  Entries (PM Peak (3pm-7pm))  \
0                          

  df = pd.read_csv(file_path)


In [5]:
# Define time period columns
traffic_columns = [
    "Entries (AM Peak (Open-9:30am))",
    "Entries (Midday (9:30am-3pm))",
    "Entries (PM Peak (3pm-7pm))",
    "Entries (Evening (7pm-12am))",
    "Entries (Late Night (12am-Close))"
]

# Convert entry columns to numeric in case of incorrect data types
for col in traffic_columns:
    df[col] = pd.to_numeric(df[col], errors="coerce")

# Compute total foot traffic per station per day
df["Total_Foot_Traffic"] = df[traffic_columns].sum(axis=1)

In [6]:
# Drop stations where Total_Foot_Traffic is 0 (closed stations)
df = df[df["Total_Foot_Traffic"] > 0]

In [7]:
# Reshape the dataset to include time periods explicitly
df_melted = df.melt(
    id_vars=["Station Name", "Date"], 
    value_vars=traffic_columns, 
    var_name="Time Period", 
    value_name="Foot_Traffic"
)

# Compute total average foot traffic per station (ignoring time period differences)
df_melted["Station_Avg"] = df_melted.groupby("Station Name")["Foot_Traffic"].transform("mean")

# Compute Dynamic Pricing Percentage based on station-wide average
df_melted["Dynamic_Price_Percentage"] = ((df_melted["Foot_Traffic"] - df_melted["Station_Avg"]) / df_melted["Station_Avg"]) * 100

# 🔹 Sort by foot traffic (high to low) instead of by price percentage
df_sorted = df_melted.sort_values(by="Foot_Traffic", ascending=False)

# Display top 20 busiest station-time slots with adjusted pricing
df_sorted[["Station Name", "Date", "Time Period", "Foot_Traffic", "Dynamic_Price_Percentage"]].head(10)


Unnamed: 0,Station Name,Date,Time Period,Foot_Traffic,Dynamic_Price_Percentage
99641,Smithsonian,2024-03-24,Entries (PM Peak (3pm-7pm)),19275,1661.959566
62743,Rosslyn,2024-10-27,Entries (Midday (9:30am-3pm)),19233,1359.930746
99647,Smithsonian,2024-03-30,Entries (PM Peak (3pm-7pm)),16869,1442.023135
135483,Smithsonian,2024-07-04,Entries (Evening (7pm-12am)),16405,1399.608129
129365,Navy Yard-Ballpark,2024-07-19,Entries (Evening (7pm-12am)),16222,860.989092
129323,Navy Yard-Ballpark,2024-06-07,Entries (Evening (7pm-12am)),15788,835.278991
129359,Navy Yard-Ballpark,2024-07-13,Entries (Evening (7pm-12am)),15085,793.633365
129366,Navy Yard-Ballpark,2024-07-20,Entries (Evening (7pm-12am)),14450,756.016051
125692,L'Enfant Plaza,2024-07-04,Entries (Evening (7pm-12am)),13448,631.985195
129349,Navy Yard-Ballpark,2024-07-03,Entries (Evening (7pm-12am)),13359,691.385358


In [8]:
most_expensive = df_melted.sort_values(by="Dynamic_Price_Percentage", ascending=False).head(10)
most_expensive[["Station Name", "Date", "Time Period", "Foot_Traffic", "Dynamic_Price_Percentage"]]

Unnamed: 0,Station Name,Date,Time Period,Foot_Traffic,Dynamic_Price_Percentage
37186,Arlington Cemetery,2024-12-14,Entries (Midday (9:30am-3pm)),6636,3088.165028
128592,Morgan Boulevard,2024-06-08,Entries (Evening (7pm-12am)),5432,2644.877617
93007,Morgan Boulevard,2024-11-10,Entries (PM Peak (3pm-7pm)),5289,2572.617399
128733,Morgan Boulevard,2024-10-27,Entries (Evening (7pm-12am)),5256,2555.941965
92972,Morgan Boulevard,2024-10-06,Entries (PM Peak (3pm-7pm)),4808,2329.560306
128781,Morgan Boulevard,2024-12-14,Entries (Evening (7pm-12am)),4783,2316.927401
93021,Morgan Boulevard,2024-11-24,Entries (PM Peak (3pm-7pm)),3960,1901.052165
128726,Morgan Boulevard,2024-10-20,Entries (Evening (7pm-12am)),3779,1809.589932
92951,Morgan Boulevard,2024-09-15,Entries (PM Peak (3pm-7pm)),3737,1788.366652
93028,Morgan Boulevard,2024-12-01,Entries (PM Peak (3pm-7pm)),3655,1746.930723
