In [64]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
file_path = "/Users/illiapolishchuk/Desktop/everyday_2024.csv"
df = pd.read_csv(file_path)

# Display first few rows to inspect data
print(df.head())

   Station Name        Date    Month  Season  Entries (AM Peak (Open-9:30am))  \
0  Addison Road  2024-01-01  January  Winter                              100   
1  Addison Road  2024-01-02  January  Winter                              642   
2  Addison Road  2024-01-03  January  Winter                              864   
3  Addison Road  2024-01-04  January  Winter                              858   
4  Addison Road  2024-01-05  January  Winter                              722   

   Entries (Evening (7pm-12am))  Entries (Late Night (12am-Close))  \
0                            66                                  2   
1                            96                                 15   
2                            84                                 10   
3                            76                                 31   
4                            82                                  1   

   Entries (Midday (9:30am-3pm))  Entries (PM Peak (3pm-7pm))  \
0                          

  df = pd.read_csv(file_path)


In [66]:
# Define time period columns
traffic_columns = [
    "Entries (AM Peak (Open-9:30am))",
    "Entries (Midday (9:30am-3pm))",
    "Entries (PM Peak (3pm-7pm))",
    "Entries (Evening (7pm-12am))",
    "Entries (Late Night (12am-Close))"
]

# Convert entry columns to numeric in case of incorrect data types
for col in traffic_columns:
    df[col] = pd.to_numeric(df[col], errors="coerce")

# Compute total foot traffic per station per day
df["Total_Foot_Traffic"] = df[traffic_columns].sum(axis=1)

In [67]:
# Drop stations where Total_Foot_Traffic is 0 (closed stations)
df = df[df["Total_Foot_Traffic"] > 0]

In [76]:
# Reshape the dataset to include time periods explicitly
df_melted = df.melt(
    id_vars=["Station Name", "Date"], 
    value_vars=traffic_columns, 
    var_name="Time Period", 
    value_name="Foot_Traffic"
)

# Compute total average foot traffic per station (ignoring time period differences)
df_melted["Station_Avg"] = df_melted.groupby("Station Name")["Foot_Traffic"].transform("mean")

# Compute Dynamic Pricing Percentage based on station-wide average
df_melted["Dynamic_Price_Percentage"] = ((df_melted["Foot_Traffic"] - df_melted["Station_Avg"]) / df_melted["Station_Avg"]) * 100

# 🔹 Sort by foot traffic (high to low) instead of by price percentage
df_sorted = df_melted.sort_values(by="Foot_Traffic", ascending=False)

# Display top 20 busiest station-time slots with adjusted pricing
df_sorted[["Station Name", "Date", "Time Period", "Foot_Traffic", "Dynamic_Price_Percentage"]].head(10)


Unnamed: 0,Station Name,Date,Time Period,Foot_Traffic,Dynamic_Price_Percentage
99641,Smithsonian,2024-03-24,Entries (PM Peak (3pm-7pm)),19275,514.196612
62743,Rosslyn,2024-10-27,Entries (Midday (9:30am-3pm)),19233,955.872166
99647,Smithsonian,2024-03-30,Entries (PM Peak (3pm-7pm)),16869,437.52958
135483,Smithsonian,2024-07-04,Entries (Evening (7pm-12am)),16405,2296.008667
129365,Navy Yard-Ballpark,2024-07-19,Entries (Evening (7pm-12am)),16222,539.906621
129323,Navy Yard-Ballpark,2024-06-07,Entries (Evening (7pm-12am)),15788,522.786693
129359,Navy Yard-Ballpark,2024-07-13,Entries (Evening (7pm-12am)),15085,495.055565
129366,Navy Yard-Ballpark,2024-07-20,Entries (Evening (7pm-12am)),14450,470.006822
125692,L'Enfant Plaza,2024-07-04,Entries (Evening (7pm-12am)),13448,1008.483944
129349,Navy Yard-Ballpark,2024-07-03,Entries (Evening (7pm-12am)),13359,426.970321


In [75]:
most_expensive = df_melted.sort_values(by="Dynamic_Price_Percentage", ascending=False).head(10)
most_expensive[["Station Name", "Date", "Time Period", "Foot_Traffic", "Dynamic_Price_Percentage"]]

Unnamed: 0,Station Name,Date,Time Period,Foot_Traffic,Dynamic_Price_Percentage
164536,Morgan Boulevard,2024-12-29,Entries (Late Night (12am-Close)),2122,14760.514198
108503,Arlington Cemetery,2024-07-04,Entries (Evening (7pm-12am)),3715,11006.763601
161246,King St-Old Town,2024-12-31,Entries (Late Night (12am-Close)),1409,6032.643596
144789,Ashburn,2024-12-31,Entries (Late Night (12am-Close)),75,4784.341637
128592,Morgan Boulevard,2024-06-08,Entries (Evening (7pm-12am)),5432,3605.389848
128733,Morgan Boulevard,2024-10-27,Entries (Evening (7pm-12am)),5256,3485.333034
146252,Braddock Road,2024-12-31,Entries (Late Night (12am-Close)),382,3373.590062
128781,Morgan Boulevard,2024-12-14,Entries (Evening (7pm-12am)),4783,3162.680347
160514,Innovation Center,2024-12-31,Entries (Late Night (12am-Close)),46,2563.924051
136513,Stadium-Armory,2024-04-27,Entries (Evening (7pm-12am)),6015,2510.999099
