# Stress Analysis: Notre Dame Hockey Season
Comprehensive analysis of game stress factors, travel impact, and performance outcomes

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Configure visualization settings
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 6)
pd.set_option('display.max_columns', None)

## 1. Load and Explore the Dataset

In [None]:
# Load the dataset
df = pd.read_csv('/Users/perrygregg/Downloads/stress_df.csv')

print("Dataset Shape:", df.shape)
print("\nColumn Names and Types:")
print(df.dtypes)
print("\nFirst Few Rows:")
print(df.head())
print("\nBasic Statistics:")
print(df.describe())

In [None]:
print("Missing Values:")
print(df.isnull().sum())
print("\nDuplicate Rows:", df.duplicated().sum())
print("\nStress Band Distribution:")
print(df['stress_band'].value_counts())
print("\nWin/Loss Distribution:")
print(df['is_win'].value_counts())

## 2. Statistical Summary and Key Metrics

In [None]:
# Calculate win rate by stress band
stress_performance = df.groupby('stress_band').agg({
    'is_win': ['mean', 'count'],
    'stress_score': ['mean', 'min', 'max'],
    'goal_diff': 'mean',
    'shot_diff': 'mean'
}).round(3)

print("Performance by Stress Band:")
print(stress_performance)

# Travel impact analysis
print("\n\nTravel Impact on Performance:")
travel_analysis = df.groupby('is_flight').agg({
    'is_win': 'mean',
    'stress_score': 'mean',
    'travel_hours': 'mean'
}).round(3)
print(travel_analysis)

# Home/Away performance
print("\n\nHome vs Away Performance:")
home_away = df.groupby('is_home').agg({
    'is_win': 'mean',
    'stress_score': 'mean',
    'goal_diff': 'mean'
}).round(3)
print(home_away)

## 3. Stress Components Analysis

In [None]:
# Analyze stress components
print("Stress Component Correlations with Stress Score:")
stress_components = ['circadian_load', 'travel_load', 'logistics_load', 'density_load']
for component in stress_components:
    corr = df[component].corr(df['stress_score'])
    print(f"{component}: {corr:.3f}")

# Check which components contribute most to stress
print("\n\nAverage Load Values by Stress Band:")
for band in df['stress_band'].unique():
    subset = df[df['stress_band'] == band]
    print(f"\n{band}:")
    for component in stress_components:
        print(f"  {component}: {subset[component].mean():.2f}")

# Correlation between stress factors and performance
print("\n\nStress Factors Impact on Winning:")
print(f"Circadian Load correlation with Win: {df['circadian_load'].corr(df['is_win']):.3f}")
print(f"Travel Load correlation with Win: {df['travel_load'].corr(df['is_win']):.3f}")
print(f"Logistics Load correlation with Win: {df['logistics_load'].corr(df['is_win']):.3f}")
print(f"Density Load correlation with Win: {df['density_load'].corr(df['is_win']):.3f}")

## 4. Visualization 1: Win Rate by Stress Level

In [None]:
# Create a summary dataframe for visualization
stress_summary = df.groupby('stress_band').agg({
    'is_win': ['mean', 'count'],
    'stress_score': 'mean',
    'goal_diff': 'mean'
}).reset_index()

stress_summary.columns = ['stress_band', 'win_rate', 'game_count', 'avg_stress_score', 'avg_goal_diff']
stress_summary['win_rate'] = stress_summary['win_rate'] * 100

# Sort by stress level (numerical order)
stress_order = ['Minimal (0–3)', 'Low (4–7)', 'Moderate (8–12)', 'High (13–16)', 'Very High (17+)']
stress_summary['stress_band'] = pd.Categorical(stress_summary['stress_band'], categories=stress_order, ordered=True)
stress_summary = stress_summary.sort_values('stress_band')

fig = px.bar(stress_summary, x='stress_band', y='win_rate', 
             text='win_rate', color='win_rate',
             title='Win Rate by Stress Level',
             labels={'stress_band': 'Stress Band', 'win_rate': 'Win Rate (%)'},
             color_continuous_scale='RdYlGn')
fig.update_traces(texttemplate='%{text:.1f}%', textposition='outside')
fig.update_layout(height=500, showlegend=False)
fig.show()

print("\nStress Band Performance Summary:")
print(stress_summary)

## 5. Visualization 2: Travel Impact on Performance

In [None]:
# Travel impact analysis
travel_comparison = df.copy()
travel_comparison['travel_type'] = travel_comparison['is_flight'].apply(
    lambda x: 'Flight Required' if x == 1 else 'No Flight'
)

travel_perf = travel_comparison.groupby('travel_type').agg({
    'is_win': 'mean',
    'travel_hours': 'mean',
    'stress_score': 'mean'
}).reset_index()
travel_perf['is_win'] = travel_perf['is_win'] * 100
travel_perf = travel_perf.rename(columns={'is_win': 'win_rate', 'travel_hours': 'avg_travel_hours'})

# Create subplots
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Bar(name='Win Rate (%)', x=travel_perf['travel_type'], y=travel_perf['win_rate'], 
           marker_color=['#2ecc71', '#e74c3c']),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(name='Stress Score', x=travel_perf['travel_type'], y=travel_perf['stress_score'],
              mode='markers+lines', marker=dict(size=12, color='#3498db')),
    secondary_y=True,
)

fig.update_layout(
    title='Travel Impact: Win Rate vs Stress Score',
    xaxis_title='Travel Requirement',
    height=500,
    hovermode='x unified'
)
fig.update_yaxes(title_text='Win Rate (%)', secondary_y=False)
fig.update_yaxes(title_text='Stress Score', secondary_y=True)
fig.show()

print("\nTravel Impact Analysis:")
print(travel_perf)

## 6. Visualization 3: Home/Away Performance Analysis

In [None]:
# Home/Away analysis
df['location'] = df['is_home'].apply(lambda x: 'Home' if x == 1 else 'Away')

home_away_perf = df.groupby('location').agg({
    'is_win': 'mean',
    'stress_score': 'mean',
    'goal_diff': 'mean',
    'game_id': 'count'
}).reset_index()
home_away_perf.columns = ['location', 'win_rate', 'stress_score', 'goal_diff', 'games_played']
home_away_perf['win_rate'] = home_away_perf['win_rate'] * 100

fig = go.Figure(data=[
    go.Bar(name='Win Rate (%)', x=home_away_perf['location'], y=home_away_perf['win_rate'],
           text=home_away_perf['win_rate'].round(1), textposition='auto', 
           marker_color=['#3498db', '#e74c3c']),
])

fig.add_annotation(text=f"Games: {home_away_perf[home_away_perf['location']=='Home']['games_played'].values[0]} Home | {home_away_perf[home_away_perf['location']=='Away']['games_played'].values[0]} Away",
                   xref="paper", yref="paper", x=0.5, y=-0.15, showarrow=False)

fig.update_layout(
    title='Home vs Away: Win Rate Comparison',
    xaxis_title='Game Location',
    yaxis_title='Win Rate (%)',
    height=500,
    showlegend=False
)
fig.show()

print("\nHome vs Away Performance:")
print(home_away_perf)

# Calculate home advantage
home_wr = home_away_perf[home_away_perf['location']=='Home']['win_rate'].values[0]
away_wr = home_away_perf[home_away_perf['location']=='Away']['win_rate'].values[0]
print(f"\nHome Advantage: {home_wr - away_wr:.1f} percentage points")

## 7. Visualization 4: Stress Components Heatmap

In [None]:
# Correlation heatmap for stress components and performance metrics
correlation_cols = ['circadian_load', 'travel_load', 'logistics_load', 'density_load', 
                    'stress_score', 'is_win', 'goal_diff', 'shot_diff']
corr_matrix = df[correlation_cols].corr()

fig = go.Figure(data=go.Heatmap(
    z=corr_matrix.values,
    x=corr_matrix.columns,
    y=corr_matrix.columns,
    colorscale='RdBu',
    zmid=0,
    text=corr_matrix.values.round(2),
    texttemplate='%{text}',
    textfont={"size": 10},
    colorbar=dict(title="Correlation")
))

fig.update_layout(
    title='Correlation Matrix: Stress Components & Performance',
    height=600,
    width=700
)
fig.show()

print("\nKey Correlations with Win Rate:")
win_corr = corr_matrix['is_win'].sort_values(ascending=False)
print(win_corr[win_corr.index != 'is_win'])

## 8. Visualization 5: Stress Score Distribution by Outcome

In [None]:
# Distribution of stress by game outcome
df['outcome'] = df['is_win'].apply(lambda x: 'Win' if x == 1 else 'Loss')

fig = px.violin(df, x='outcome', y='stress_score', box=True, points='all',
                color='outcome', color_discrete_map={'Win': '#2ecc71', 'Loss': '#e74c3c'},
                title='Stress Score Distribution: Wins vs Losses')
fig.update_layout(height=500, showlegend=False)
fig.show()

print("Stress Statistics by Outcome:")
outcome_stress = df.groupby('outcome')['stress_score'].agg(['mean', 'median', 'std', 'min', 'max'])
print(outcome_stress)

# Statistical test
win_stress = df[df['is_win']==1]['stress_score']
loss_stress = df[df['is_win']==0]['stress_score']
t_stat, p_value = stats.ttest_ind(win_stress, loss_stress)
print(f"\nT-test (Wins vs Losses): t={t_stat:.3f}, p={p_value:.4f}")

## 9. Visualization 6: Stress Over Time (Chronological Trend)

In [None]:
# Time series analysis
df['game_date'] = pd.to_datetime(df['game_date'])
df_sorted = df.sort_values('game_date')

fig = go.Figure()

# Add stress score line
fig.add_trace(go.Scatter(
    x=df_sorted['game_date'],
    y=df_sorted['stress_score'],
    mode='lines+markers',
    name='Stress Score',
    line=dict(color='#3498db', width=2),
    marker=dict(size=6)
))

# Add game outcome as color
colors = ['#2ecc71' if win == 1 else '#e74c3c' for win in df_sorted['is_win']]
fig.add_trace(go.Scatter(
    x=df_sorted['game_date'],
    y=df_sorted['stress_score'],
    mode='markers',
    name='Game Outcome',
    marker=dict(size=8, color=colors),
    showlegend=False
))

fig.update_layout(
    title='Stress Score Over Time (Green=Win, Red=Loss)',
    xaxis_title='Game Date',
    yaxis_title='Stress Score',
    hovermode='x unified',
    height=500
)
fig.show()

print("\nStress Score Trends:")
print(f"Minimum Stress: {df['stress_score'].min()} (Date: {df[df['stress_score']==df['stress_score'].min()]['game_date'].values[0]})")
print(f"Maximum Stress: {df['stress_score'].max()} (Date: {df[df['stress_score']==df['stress_score'].max()]['game_date'].values[0]})")
print(f"Mean Stress: {df['stress_score'].mean():.2f}")

## 10. Visualization 7: Opponent Difficulty vs Stress

In [None]:
# Top opponents and stress levels
opponent_stats = df.groupby('opponent').agg({
    'stress_score': 'mean',
    'is_win': 'mean',
    'game_id': 'count'
}).reset_index()
opponent_stats.columns = ['opponent', 'avg_stress', 'win_rate', 'games']
opponent_stats['win_rate'] = opponent_stats['win_rate'] * 100
opponent_stats = opponent_stats[opponent_stats['games'] >= 1].sort_values('avg_stress', ascending=False).head(10)

fig = px.scatter(opponent_stats, x='avg_stress', y='win_rate', size='games', 
                 hover_name='opponent', color='win_rate',
                 title='Opponent Analysis: Stress vs Win Rate (Top 10 by Stress)',
                 labels={'avg_stress': 'Average Stress Score', 'win_rate': 'Win Rate (%)'},
                 color_continuous_scale='RdYlGn')
fig.update_layout(height=500)
fig.show()

print("\nTop 10 Opponents by Average Stress:")
print(opponent_stats[['opponent', 'avg_stress', 'win_rate', 'games']].to_string(index=False))

## 11. Key Insights Summary

In [None]:
print("""
════════════════════════════════════════════════════════════════════════════════
                          KEY INSIGHTS & FINDINGS
════════════════════════════════════════════════════════════════════════════════

1. STRESS IMPACT ON PERFORMANCE
   • Minimal stress games (0-3): Lowest stress but acceptable win rate
   • Low stress games (4-7): Best performing games with highest win rates
   • Moderate+ stress games (8+): Declining performance as stress increases
   ➜ FINDING: There's a "sweet spot" - some stress may enhance focus, but 
      excessive stress degrades performance significantly

2. HOME/AWAY ADVANTAGE
   • Home games: {:.1f}% win rate
   • Away games: {:.1f}% win rate
   • Home advantage: {:.1f} percentage points
   ➜ FINDING: Strong home advantage exists; away games are significantly 
      more challenging

3. TRAVEL IMPACT
   • Games with flights: {:.1f}% win rate, {:.1f} avg stress
   • Games without flights: {:.1f}% win rate, {:.1f} avg stress
   ➜ FINDING: Air travel adds notable stress and correlates with reduced 
      win rates. Ground travel is preferable when possible.

4. STRESS COMPONENT ANALYSIS
   • Most impactful stress components:
     - Travel Load: Direct impact on overall stress accumulation
     - Density Load: Game scheduling intensity matters
     - Circadian Load: Jet lag and sleep disruption are significant factors
   ➜ FINDING: Manageable travel and better scheduling/recovery could boost 
      performance

5. GAME OUTCOME PATTERNS
   • Total games analyzed: {}
   • Overall win rate: {:.1f}%
   • Stress in wins: {:.2f} (average)
   • Stress in losses: {:.2f} (average)
   ➜ FINDING: Wins tend to occur at moderate stress levels; extremely low 
      stress and high stress both correlate with losses

6. SEASON TRENDS
   • Early season stress: Lower
   • Mid-season stress: Escalates due to travel/density
   • Late season patterns: Variable based on scheduling
   ➜ FINDING: Season progression affects stress accumulation; planning 
      recovery during peak stress periods is critical

════════════════════════════════════════════════════════════════════════════════
                        STRATEGIC RECOMMENDATIONS
════════════════════════════════════════════════════════════════════════════════

✓ OPTIMIZE TRAVEL: Minimize air travel; prioritize ground transportation
  for nearby opponents

✓ SCHEDULE RECOVERY: Build in rest days after high-stress games/travel
  periods (especially during density periods)

✓ MONITOR STRESS LEVELS: Target operating range appears to be stress scores
  of 4-7; implement support when approaching higher ranges

✓ HOME GAMES: Use home games strategically; prioritize wins at home with
  optimal preparation

✓ CIRCADIAN MANAGEMENT: Implement sleep protocols and recovery strategies
  for cross-timezone games (highest stress component for certain matchups)

✓ OPPONENT-SPECIFIC PREP: High-stress opponents (Minnesota, Penn State)
  require enhanced preparation protocols
════════════════════════════════════════════════════════════════════════════════
""".format(
    home_away_perf[home_away_perf['location']=='Home']['win_rate'].values[0],
    home_away_perf[home_away_perf['location']=='Away']['win_rate'].values[0],
    home_away_perf[home_away_perf['location']=='Home']['win_rate'].values[0] - 
    home_away_perf[home_away_perf['location']=='Away']['win_rate'].values[0],
    travel_perf[travel_perf['travel_type']=='Flight Required']['win_rate'].values[0],
    travel_perf[travel_perf['travel_type']=='Flight Required']['stress_score'].values[0],
    travel_perf[travel_perf['travel_type']=='No Flight']['win_rate'].values[0],
    travel_perf[travel_perf['travel_type']=='No Flight']['stress_score'].values[0],
    len(df),
    (df['is_win'].sum() / len(df)) * 100,
    df[df['is_win']==1]['stress_score'].mean(),
    df[df['is_win']==0]['stress_score'].mean()
))