In [17]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [18]:
from pathlib import Path

# Build the path to incidents_final.csv relative to the current working directory
csv_path = Path.cwd() / 'data' / 'incidents_final.csv'

incidents_df = pd.read_csv(csv_path)
incidents_df.head()

Unnamed: 0,Incident_ID,Customer_ID,Age,Gender,Season,Weather,Activity,Slope_Name,Slope_Difficulty,Skill_Level,Protective_Gear,Injuries,Response_Time,Hospital_Transport,Hospital_Stay_Length,Severity_Category
0,0,2679,33,Male,Winter,Foggy,Skiing,Bunny Hill,Easy,Beginner,Helmet,Sprain,15,No,0,Medium
1,1,4257,36,Female,Summer,Sunny,Hiking,Devil’s Drop,Expert,Beginner,Helmet,Fracture,23,No,0,Medium
2,2,7924,47,Female,Winter,Foggy,Snowboarding,Thunder Run,Advanced,Beginner,Helmet,Sprain,13,Yes,1,Critical
3,3,7224,56,Male,Summer,Sunny,Hiking,Thunder Run,Advanced,Intermediate,Helmet,Fracture,9,No,0,Medium
4,4,9785,17,Male,Summer,Windy,Climbing,Blue Ridge,Intermediate,Beginner,Unknown,Head Injury,10,Yes,4,Critical


In [19]:
incidents_df['Severity_Category'].value_counts(normalize=True)


Severity_Category
Medium      0.523
Critical    0.334
Light       0.143
Name: proportion, dtype: float64

In [20]:
# Total incidents (rows) per slope
slope_counts = incidents_df["Slope_Name"].value_counts().reset_index()
slope_counts.columns = ["Slope_Name", "Total_Injuries"]

# Sort descending
slope_counts = slope_counts.sort_values("Total_Injuries", ascending=False).reset_index(drop=True)

print("=== Total Injury Counts by Slope ===")
print(slope_counts)

=== Total Injury Counts by Slope ===
      Slope_Name  Total_Injuries
0    Summit Peak             174
1    Thunder Run             150
2     Blue Ridge             143
3   Devil’s Drop             141
4     Rocky Pass             138
5  Shadow Valley             132
6     Bunny Hill             122


In [27]:
unique_slopes = incidents_df['Slope_Name'].unique()
print("Unique slope names in df:")
for slope in unique_slopes:
    print(repr(slope))

Unique slope names in df:
'Bunny Hill'
'Devil’s Drop'
'Thunder Run'
'Blue Ridge'
'Summit Peak'
'Shadow Valley'
'Rocky Pass'


In [34]:
slopes_of_interest = ["Devil’s Drop", "Summit Peak"]
subset_df = incidents_df[incidents_df["Slope_Name"].isin(slopes_of_interest)] 
print(subset_df.shape)

(315, 16)


In [35]:
subset_df["Slope_Name"].unique()

array(['Devil’s Drop', 'Summit Peak'], dtype=object)

In [36]:
# Crosstab: slope & activity vs. severity category
crosstab_slope_activity_sev = pd.crosstab(
    index=[subset_df["Slope_Name"], subset_df["Activity"]],
    columns=subset_df["Severity_Category"]
)

print("\n=== Crosstab: (Slope_Name, Activity) vs. Severity_Category ===")
print(crosstab_slope_activity_sev)



=== Crosstab: (Slope_Name, Activity) vs. Severity_Category ===
Severity_Category             Critical  Light  Medium
Slope_Name   Activity                                
Devil’s Drop Climbing               22      0       0
             Hiking                  6      0      13
             Mountain Biking        13      0      12
             Skiing                 30      0       8
             Snowboarding           37      0       0
Summit Peak  Climbing               17      0      12
             Hiking                  0      8      14
             Mountain Biking         0     12      22
             Skiing                 12      6      20
             Snowboarding           21      0      30


In [37]:
# Crosstab: slope & activity vs. injuries
crosstab_slope_activity_inj = pd.crosstab(
    index=[subset_df["Slope_Name"], subset_df["Activity"]],
    columns=subset_df["Injuries"]
)

print("\n=== Crosstab: (Slope_Name, Activity) vs. Injuries ===")
print(crosstab_slope_activity_inj)



=== Crosstab: (Slope_Name, Activity) vs. Injuries ===
Injuries                      Concussion  Fracture  Head Injury  Sprain
Slope_Name   Activity                                                  
Devil’s Drop Climbing                  0        13            9       0
             Hiking                    0         8            0      11
             Mountain Biking           0        11            0      14
             Skiing                   13        12            0      13
             Snowboarding             11        13            0      13
Summit Peak  Climbing                  0        21            8       0
             Hiking                    0        13            0       9
             Mountain Biking           0        17            0      17
             Skiing                   12        10            0      16
             Snowboarding             15        17            0      19


#### Operational Implications
**1. Snowboarding:**
- Highest total incidents (51) and a large share of critical (21).
- Mix of concussions (15) and fractures (17) suggests heavy impacts/falls.


**2. Climbing:**
- Smaller total (29) but extremely high critical fraction (17).
- Mostly fractures and head injuries—could be falls or rope mishaps.


**3. Skiing:**
- Moderate total (38), with 12 critical. Concussions and fractures are common.


**4. Mountain Biking & Hiking:**
- Zero critical injuries in this data, though fractures & sprains do occur.
- Lower priority for “severe outcome” reduction, but still possible to reduce overall incidents via better trail maintenance or beginner instructions.

#### Conclusion: 
- Devil’s Drop is a hotspot for extremely high critical rates in Snowboarding, Climbing, and Skiing.
- Summit Peak has more total incidents, with Snowboarding leading both total and critical counts, plus moderate severity in Skiing and Climbing.