In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [9]:
from pathlib import Path

# Build the path to incidents_final.csv relative to the current working directory
csv_path = Path.cwd() / 'data' / 'incidents_final.csv'

incidents_df = pd.read_csv(csv_path)
incidents_df.head()

Unnamed: 0,Incident_ID,Customer_ID,Age,Gender,Season,Weather,Activity,Slope_Name,Slope_Difficulty,Skill_Level,Protective_Gear,Injuries,Response_Time,Hospital_Transport,Hospital_Stay_Length,Severity_Category
0,0,2679,33,Male,Winter,Foggy,Skiing,Bunny Hill,Easy,Beginner,Helmet,Sprain,15,No,0,Medium
1,1,4257,36,Female,Summer,Sunny,Hiking,Devil’s Drop,Expert,Beginner,Helmet,Fracture,23,No,0,Medium
2,2,7924,47,Female,Winter,Foggy,Snowboarding,Thunder Run,Advanced,Beginner,Helmet,Sprain,13,Yes,1,Critical
3,3,7224,56,Male,Summer,Sunny,Hiking,Thunder Run,Advanced,Intermediate,Helmet,Fracture,9,No,0,Medium
4,4,9785,17,Male,Summer,Windy,Climbing,Blue Ridge,Intermediate,Beginner,Unknown,Head Injury,10,Yes,4,Critical


In [10]:
hospital_df = incidents_df[incidents_df['Hospital_Transport'] == 'Yes']
resort_df = incidents_df[incidents_df['Hospital_Transport'] == 'No']

In [12]:
hospital_df.head()

Unnamed: 0,Incident_ID,Customer_ID,Age,Gender,Season,Weather,Activity,Slope_Name,Slope_Difficulty,Skill_Level,Protective_Gear,Injuries,Response_Time,Hospital_Transport,Hospital_Stay_Length,Severity_Category
2,2,7924,47,Female,Winter,Foggy,Snowboarding,Thunder Run,Advanced,Beginner,Helmet,Sprain,13,Yes,1,Critical
4,4,9785,17,Male,Summer,Windy,Climbing,Blue Ridge,Intermediate,Beginner,Unknown,Head Injury,10,Yes,4,Critical
12,12,3621,37,Female,Winter,Windy,Skiing,Devil’s Drop,Expert,Beginner,Helmet,Fracture,19,Yes,2,Critical
16,16,6310,49,Female,Winter,Snowy,Snowboarding,Rocky Pass,Intermediate,Advanced,Helmet,Sprain,13,Yes,3,Critical
20,20,2049,49,Female,Summer,Windy,Climbing,Summit Peak,Advanced,Beginner,Helmet,Fracture,15,Yes,3,Critical


In [13]:
hospital_df['Severity_Category'].unique()

array(['Critical'], dtype=object)

In [11]:
# Hospital Transport: Response Time by Slope & Severity
hospital_response_stats = (
    hospital_df
    .groupby(["Slope_Name", "Severity_Category"])["Response_Time"]
    .agg(["count", "mean", "std", "min", "max"])
    .reset_index()
)
print("=== HOSPITAL TRANSPORT: Response Time by Slope & Severity ===")
print(hospital_response_stats)

=== HOSPITAL TRANSPORT: Response Time by Slope & Severity ===
      Slope_Name Severity_Category  count       mean       std  min  max
0     Blue Ridge          Critical     18  13.222222  2.712836   10   19
1     Bunny Hill          Critical     14  13.357143  2.373156    9   17
2   Devil’s Drop          Critical     27  19.814815  3.363115   15   27
3     Rocky Pass          Critical     22  14.181818  3.762425    9   22
4  Shadow Valley          Critical     20  13.200000  3.286335    9   21
5    Summit Peak          Critical     24  14.250000  2.048329   10   18
6    Thunder Run          Critical     14  13.785714  2.547354   10   19


In [14]:
# Multi-level groupby: slope, season, weather, severity
multi_factors_hospital = (
    hospital_df
    .groupby(["Slope_Name", "Season", "Weather", "Severity_Category"])["Response_Time"]
    .agg(["count", "mean", "std", "min", "max"])
    .reset_index()
)

print("=== Hospital Transport: Response Time by Slope, Season, Weather, Severity ===")
print(multi_factors_hospital)

=== Hospital Transport: Response Time by Slope, Season, Weather, Severity ===
       Slope_Name  Season Weather Severity_Category  count       mean  \
0      Blue Ridge  Summer   Rainy          Critical      3  11.666667   
1      Blue Ridge  Summer   Sunny          Critical      2  13.500000   
2      Blue Ridge  Summer   Windy          Critical      3  12.666667   
3      Blue Ridge  Winter   Foggy          Critical      2  13.500000   
4      Blue Ridge  Winter   Snowy          Critical      4  14.000000   
5      Blue Ridge  Winter   Sunny          Critical      2  16.000000   
6      Blue Ridge  Winter   Windy          Critical      2  11.500000   
7      Bunny Hill  Summer   Rainy          Critical      2  15.000000   
8      Bunny Hill  Summer   Sunny          Critical      3  13.333333   
9      Bunny Hill  Winter   Foggy          Critical      3  13.333333   
10     Bunny Hill  Winter   Snowy          Critical      1  12.000000   
11     Bunny Hill  Winter   Sunny          Cri

In [16]:
# For each slope and season, pick the row where mean_response_time is highest
idx = multi_factors_hospital.groupby(["Slope_Name", "Season"])["mean"].idxmax()
max_weather_df = multi_factors_hospital.loc[idx].reset_index(drop=True)

print("=== For each slope & season, the weather with the highest average response time ===")
print(max_weather_df)

=== For each slope & season, the weather with the highest average response time ===
       Slope_Name  Season Weather Severity_Category  count       mean  \
0      Blue Ridge  Summer   Sunny          Critical      2  13.500000   
1      Blue Ridge  Winter   Sunny          Critical      2  16.000000   
2      Bunny Hill  Summer   Rainy          Critical      2  15.000000   
3      Bunny Hill  Winter   Windy          Critical      3  14.333333   
4    Devil’s Drop  Summer   Rainy          Critical      4  20.000000   
5    Devil’s Drop  Winter   Foggy          Critical      4  23.000000   
6      Rocky Pass  Summer   Sunny          Critical      4  14.000000   
7      Rocky Pass  Winter   Foggy          Critical      3  17.000000   
8   Shadow Valley  Summer   Windy          Critical      4  13.500000   
9   Shadow Valley  Winter   Snowy          Critical      3  15.333333   
10    Summit Peak  Summer   Windy          Critical      3  16.333333   
11    Summit Peak  Winter   Foggy       

#### 1. Devil's Drop Consistently Shows the HIghest Response Times
**Top-Level Averages (All Conditions):**
- Devil's Drop has a mean of 19.8 minutes - noticeably higher than all other slopes (which cluster around 13-14 minutes)
- This gap is significant (roughly 5+ minutes longer on average)

**Detailed Breakdown (Slope, Season, Weather):**
- Winter + Foggy stands out with a mean of 23.0 minutes for Devil's Drop
- Even in other winter weather (Snowy, Sunny, Windy), response times remain high (19-20+ minutes)
- In summer, Rainy conditions also reach an average of 20.0 minutes
- Min times are rarely below 15 minutes, whereas other slopes often see lower minimums

**Insight:** Devil's Drop remoteness likely contributes to significantly longer rescue/ transport times for critical injuries, especially in **winter** and **foggy** conditions

#### 2. Winter + Foggy has a noticeable impact on Rocky Pass
- **Rocky Pass (Winter, Foggy):** 17.0 minutes, higher than the 13-16 minutes often seen on other slopes or in other conditions

#### 3. Summary:
- Devil's Drop stands out as the most significant challenge, with consistently higher transport times for critical injuries - especially under winter 
- Rocky Pass sees a moderate spike (~ 17 minutes) in winter fog, but otherwise doen't approach Devil's Drop's extremes.
- Focus resources on Devil's Drop (and, to a lesser extent, Rocky Pass in winter fog) to reduce critical rescue delays

In [18]:
resort_factors = (
    resort_df
    .groupby(["Slope_Name", "Season", "Weather", "Severity_Category"])["Response_Time"]
    .agg(["count", "mean", "std", "min", "max"])
    .reset_index()
)

print("=== RESORT: Response Time by Slope, Season, Weather, Severity ===")
print(resort_factors)

=== RESORT: Response Time by Slope, Season, Weather, Severity ===
      Slope_Name  Season Weather Severity_Category  count       mean  \
0     Blue Ridge  Summer   Rainy             Light      3  17.000000   
1     Blue Ridge  Summer   Rainy            Medium     10  14.900000   
2     Blue Ridge  Summer   Sunny          Critical      1  15.000000   
3     Blue Ridge  Summer   Sunny             Light      6  16.000000   
4     Blue Ridge  Summer   Sunny            Medium     17  13.647059   
..           ...     ...     ...               ...    ...        ...   
125  Thunder Run  Winter   Sunny          Critical      3  10.000000   
126  Thunder Run  Winter   Sunny            Medium     11  12.363636   
127  Thunder Run  Winter   Windy          Critical      1  10.000000   
128  Thunder Run  Winter   Windy             Light      2  11.000000   
129  Thunder Run  Winter   Windy            Medium     13  13.000000   

          std  min  max  
0    5.291503   13   23  
1    3.142893   1

In [19]:
# 1) Group by slope, season, weather; compute mean response time
resort_weather_means = (
    resort_df
    .groupby(["Slope_Name", "Season", "Weather"])["Response_Time"]
    .mean()
    .reset_index(name="mean_response_time")
)

# 2) For each (slope, season), pick the row where mean_response_time is highest
idx = resort_weather_means.groupby(["Slope_Name", "Season"])["mean_response_time"].idxmax()
resort_max_weather_df = resort_weather_means.loc[idx].reset_index(drop=True)

print("\n=== RESORT: For each slope & season, the weather with the highest average response time ===")
print(resort_max_weather_df)



=== RESORT: For each slope & season, the weather with the highest average response time ===
       Slope_Name  Season Weather  mean_response_time
0      Blue Ridge  Summer   Rainy           15.384615
1      Blue Ridge  Winter   Windy           14.428571
2      Bunny Hill  Summer   Windy           14.000000
3      Bunny Hill  Winter   Sunny           14.230769
4    Devil’s Drop  Summer   Rainy           18.428571
5    Devil’s Drop  Winter   Windy           20.692308
6      Rocky Pass  Summer   Sunny           13.520000
7      Rocky Pass  Winter   Windy           13.470588
8   Shadow Valley  Summer   Rainy           13.333333
9   Shadow Valley  Winter   Snowy           13.555556
10    Summit Peak  Summer   Windy           13.848485
11    Summit Peak  Winter   Sunny           14.785714
12    Thunder Run  Summer   Windy           14.476190
13    Thunder Run  Winter   Snowy           13.350000


In [20]:
resort_factors = (
    resort_df
    .groupby(["Slope_Name", "Slope_Difficulty", "Season", "Weather", "Severity_Category"])["Response_Time"]
    .agg(["count", "mean", "std", "min", "max"])
    .reset_index()
)

print("=== RESORT: Response Time by Slope, Difficulty, Season, Weather, Severity ===")
print(resort_factors)

=== RESORT: Response Time by Slope, Difficulty, Season, Weather, Severity ===
      Slope_Name Slope_Difficulty  Season Weather Severity_Category  count  \
0     Blue Ridge     Intermediate  Summer   Rainy             Light      3   
1     Blue Ridge     Intermediate  Summer   Rainy            Medium     10   
2     Blue Ridge     Intermediate  Summer   Sunny          Critical      1   
3     Blue Ridge     Intermediate  Summer   Sunny             Light      6   
4     Blue Ridge     Intermediate  Summer   Sunny            Medium     17   
..           ...              ...     ...     ...               ...    ...   
125  Thunder Run         Advanced  Winter   Sunny          Critical      3   
126  Thunder Run         Advanced  Winter   Sunny            Medium     11   
127  Thunder Run         Advanced  Winter   Windy          Critical      1   
128  Thunder Run         Advanced  Winter   Windy             Light      2   
129  Thunder Run         Advanced  Winter   Windy            Med

In [21]:
# 1) Group by slope, difficulty, season, weather; compute mean response time
resort_weather_means = (
    resort_df
    .groupby(["Slope_Name", "Slope_Difficulty", "Season", "Weather"])["Response_Time"]
    .mean()
    .reset_index(name="mean_response_time")
)

# 2) For each (slope, difficulty, season), pick the row with highest mean response time
idx = resort_weather_means.groupby(["Slope_Name", "Slope_Difficulty", "Season"])["mean_response_time"].idxmax()
resort_max_weather_df = resort_weather_means.loc[idx].reset_index(drop=True)

print("\n=== RESORT: For each slope, difficulty, season, the weather with the highest average response time ===")
print(resort_max_weather_df)


=== RESORT: For each slope, difficulty, season, the weather with the highest average response time ===
       Slope_Name Slope_Difficulty  Season Weather  mean_response_time
0      Blue Ridge     Intermediate  Summer   Rainy           15.384615
1      Blue Ridge     Intermediate  Winter   Windy           14.428571
2      Bunny Hill             Easy  Summer   Windy           14.000000
3      Bunny Hill             Easy  Winter   Sunny           14.230769
4    Devil’s Drop           Expert  Summer   Rainy           18.428571
5    Devil’s Drop           Expert  Winter   Windy           20.692308
6      Rocky Pass     Intermediate  Summer   Sunny           13.520000
7      Rocky Pass     Intermediate  Winter   Windy           13.470588
8   Shadow Valley           Expert  Summer   Rainy           13.333333
9   Shadow Valley           Expert  Winter   Snowy           13.555556
10    Summit Peak         Advanced  Summer   Windy           13.848485
11    Summit Peak         Advanced  Winter  

#### Key Takeaway
Devil's Drop stands out as the most problematic slope for on-site response times, reaching ~ 20.7 minutes in winter wind and ~ 18.4 in summer rain - far above the 13-15 minute upper range typical of other slopes. Despite differences in slope difficulty and weather, no other location approaches those extremes, indicating Devil's Drop is a top priority for operational improvements to shorten critical rescue intervals.