In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr
import requests
from datetime import datetime, timedelta
import warnings

warnings.filterwarnings('ignore')

print("‚úì All libraries imported successfully!")

‚úì All libraries imported successfully!


In [8]:
# Load the fishing data CSV
catch_df = pd.read_csv('district_data.csv')

# Convert date column
catch_df['date'] = pd.to_datetime(catch_df['run_date'], format='%m-%d-%Y')
catch_df = catch_df.drop('run_date', axis=1)
catch_df = catch_df.sort_values('date').reset_index(drop=True)

print(f"‚úì Loaded {len(catch_df)} records")
print(f"Districts: {sorted(catch_df['district_id'].unique())}")
print(f"Date range: {catch_df['date'].min().date()} to {catch_df['date'].max().date()}")
print(f"\nFirst few rows:")
print(catch_df.head())

‚úì Loaded 499 records
Districts: ['egegik', 'naknek', 'nushagak', 'togiak', 'ugashik']
Date range: 2023-06-20 to 2025-07-24

First few rows:
   id district_id   district_name  catch_daily  catch_cumulative  \
0  40     ugashik         Ugashik        14124             17926   
1  41      egegik          Egegik            0            107696   
2  42      naknek  Naknek-Kvichak         6822              8390   
3  43    nushagak        Nushagak         4723             11080   
4  44      togiak          Togiak          479               767   

   escapement_daily  escapement_cumulative  in_river_estimate  total_run  \
0                 0                      0                  0      17926   
1             13644                  15066                  0     122762   
2                 0                      0                  0       8390   
3              2548                  28264                  0      39344   
4                 0                      0                  0        

In [20]:
# Bristol Bay Districts with geographic info
DISTRICTS = {
    'naknek': {
        'name': 'Naknek-Kvichak',
        'lat': 58.7,
        'lon': -157.1755,
        'color': '#3b82f6',
        'orientation': 'Southeast-facing',
    },
    'egegik': {
        'name': 'Egegik',
        'lat': 58.222,
        'lon': -157.525,
        'color': '#8b5cf6',
        'orientation': 'East-facing',
    },
    'ugashik': {
        'name': 'Ugashik',
        'lat': 57.6,
        'lon': -157.75,
        'color': '#ec4899',
        'orientation': 'Northwest-facing',
    },
    'nushagak': {
        'name': 'Nushagak',
        'lat': 58.72,
        'lon': -158.54,
        'color': '#10b981',
        'orientation': 'Southeast-facing',
    },
    'togiak': {
        'name': 'Togiak',
        'lat': 58.83,
        'lon': -160.45,
        'color': '#f59e0b',
        'orientation': 'South-facing',
    }
}

# Define date ranges for analysis
date_ranges = [
    (datetime(2023, 6, 20), datetime(2023, 7, 20)),
    (datetime(2024, 6, 17), datetime(2024, 7, 25)),
    (datetime(2025, 6, 16), datetime(2025, 7, 24))
]

print("‚úì Districts configured")
print("‚úì Date ranges set up")

‚úì Districts configured
‚úì Date ranges set up


In [10]:
# Filter catch data to the date ranges we're interested in
catch_df['year'] = catch_df['date'].dt.year

catch_analysis = catch_df[
    ((catch_df['date'] >= date_ranges[0][0]) & (catch_df['date'] <= date_ranges[0][1])) |
    ((catch_df['date'] >= date_ranges[1][0]) & (catch_df['date'] <= date_ranges[1][1])) |
    ((catch_df['date'] >= date_ranges[2][0]) & (catch_df['date'] <= date_ranges[2][1]))
].copy()

print(f"‚úì Filtered to {len(catch_analysis)} fishing records in analysis periods")

# Summary by district
print(f"\nüìä Catch data summary by district:")
summary = catch_analysis.groupby('district_name').agg({
    'catch_daily': ['sum', 'mean', 'max', 'count'],
}).round(0)
summary.columns = ['Total', 'Average', 'Max', 'Days']
print(summary)

‚úì Filtered to 499 fishing records in analysis periods

üìä Catch data summary by district:
                   Total   Average      Max  Days
district_name                                    
Egegik          22912132  222448.0  1252005   103
Naknek-Kvichak  32088734  314595.0  1584868   102
Nushagak        39216958  417202.0  1539437    94
Togiak            878789    9060.0    29885    97
Ugashik         10805320  104906.0   321914   103


In [12]:
# Get daily totals and identify peak days
daily_totals = catch_analysis.groupby('date')['catch_daily'].sum().reset_index()
daily_totals = daily_totals.sort_values('catch_daily', ascending=False)

# Top 20% are peak days
peak_threshold = daily_totals['catch_daily'].quantile(0.80)
peak_days_list = daily_totals[daily_totals['catch_daily'] >= peak_threshold]

print(f"üéØ PEAK FISHING DAYS (Top 20%)")
print(f"Peak threshold: {peak_threshold:,.0f} fish/day\n")
print(f"Dates to find weather for:")
for idx, row in peak_days_list.iterrows():
    print(f"  {row['date'].date()}: {row['catch_daily']:>10,.0f} fish")

üéØ PEAK FISHING DAYS (Top 20%)
Peak threshold: 1,866,746 fish/day

Dates to find weather for:
  2023-07-14:  2,988,534 fish
  2023-07-03:  2,914,788 fish
  2025-07-08:  2,637,858 fish
  2024-07-04:  2,619,044 fish
  2025-07-05:  2,544,572 fish
  2025-07-07:  2,525,368 fish
  2023-07-13:  2,514,287 fish
  2023-07-01:  2,462,159 fish
  2025-06-29:  2,427,292 fish
  2024-07-05:  2,426,542 fish
  2024-07-10:  2,410,514 fish
  2025-06-30:  2,371,603 fish
  2023-07-05:  2,294,002 fish
  2023-07-02:  2,271,516 fish
  2025-07-01:  2,205,673 fish
  2025-07-04:  2,082,873 fish
  2023-07-15:  2,081,163 fish
  2025-07-02:  2,057,300 fish
  2025-07-09:  2,017,753 fish
  2024-07-06:  1,935,420 fish
  2024-07-12:  1,897,423 fish


In [15]:
exec(open('weatherForPeakDays.py').read())


2023-07-14:
  Avg Temp: 59.6¬∞F
  Avg Wind Speed: 5.5 mph
  Avg Wind Direction: 122.4¬∞ (n=21 readings)

2023-07-03:
  Avg Temp: 50.0¬∞F
  Avg Wind Speed: 11.3 mph
  Avg Wind Direction: 225.0¬∞ (n=21 readings)

2025-07-08:
  Avg Temp: 58.9¬∞F
  Avg Wind Speed: 5.6 mph
  Avg Wind Direction: 109.8¬∞ (n=24 readings)

2024-07-04:
  Avg Temp: 52.8¬∞F
  Avg Wind Speed: 7.1 mph
  Avg Wind Direction: 68.8¬∞ (n=15 readings)

2025-07-05:
  Avg Temp: 51.3¬∞F
  Avg Wind Speed: 17.1 mph
  Avg Wind Direction: 90.0¬∞ (n=22 readings)

2025-07-07:
  Avg Temp: 53.0¬∞F
  Avg Wind Speed: 4.8 mph
  Avg Wind Direction: 85.1¬∞ (n=19 readings)

2023-07-13:
  Avg Temp: 59.2¬∞F
  Avg Wind Speed: 5.8 mph
  Avg Wind Direction: 90.5¬∞ (n=20 readings)

2023-07-01:
  Avg Temp: 50.3¬∞F
  Avg Wind Speed: 11.4 mph
  Avg Wind Direction: 94.1¬∞ (n=23 readings)

2025-06-29:
  Avg Temp: 52.4¬∞F
  Avg Wind Speed: 5.7 mph
  Avg Wind Direction: 85.5¬∞ (n=18 readings)

2024-07-05:
  Avg Temp: 58.2¬∞F
  Avg Wind Speed: 6.1 mph

In [16]:
# Load the weather CSV we just created
weather_df = pd.read_csv('bristol_bay_weather.csv')
weather_df['date'] = pd.to_datetime(weather_df['date'])

print(f"‚úì Loaded {len(weather_df)} weather records")
print(f"Date range: {weather_df['date'].min().date()} to {weather_df['date'].max().date()}")
print(f"\nWeather data:")
print(weather_df)

‚úì Loaded 21 weather records
Date range: 2023-07-01 to 2025-07-09

Weather data:
         date  wind_speed_mph  wind_direction_deg  temperature_f
0  2023-07-14             5.5               122.4           59.6
1  2023-07-03            11.3               225.0           50.0
2  2025-07-08             5.6               109.8           58.9
3  2024-07-04             7.1                68.8           52.8
4  2025-07-05            17.1                90.0           51.3
5  2025-07-07             4.8                85.1           53.0
6  2023-07-13             5.8                90.5           59.2
7  2023-07-01            11.4                94.1           50.3
8  2025-06-29             5.7                85.5           52.4
9  2024-07-05             6.1                86.1           58.2
10 2024-07-10             8.8                91.9           50.8
11 2025-06-30             7.2                92.7           56.5
12 2023-07-05            14.1               206.6           49.6
13 2023-

In [17]:
# Merge fishing and weather data
print("[MERGING DATA]")

# Get daily totals for all fishing data
daily_total = catch_analysis.groupby('date').agg({
    'catch_daily': 'sum',
}).reset_index()

# Merge with weather
merged = daily_total.merge(weather_df, on='date', how='inner')

print(f"‚úì Merged {len(merged)} days with both catch and weather data\n")

# ============================================================================
# CORRELATION ANALYSIS
# ============================================================================
print("="*80)
print("CORRELATION ANALYSIS - PEAK FISHING DAYS")
print("="*80)

# Wind Speed vs Catch
print("\n[WIND SPEED vs CATCH]")
corr_wind, p_wind = pearsonr(merged['wind_speed_mph'], merged['catch_daily'])
print(f"Pearson Correlation: {corr_wind:.4f}")
print(f"P-value: {p_wind:.6f}")

if p_wind < 0.05:
    if abs(corr_wind) > 0.5:
        print("‚úì STRONG & SIGNIFICANT correlation!")
    elif abs(corr_wind) > 0.3:
        print("‚úì MODERATE & SIGNIFICANT correlation")
    else:
        print("~ Weak but SIGNIFICANT correlation")
else:
    print("~ Not statistically significant (p ‚â• 0.05)")

# Temperature vs Catch
print("\n[TEMPERATURE vs CATCH]")
corr_temp, p_temp = pearsonr(merged['temperature_f'], merged['catch_daily'])
print(f"Pearson Correlation: {corr_temp:.4f}")
print(f"P-value: {p_temp:.6f}")

if p_temp < 0.05:
    if abs(corr_temp) > 0.3:
        print("‚úì SIGNIFICANT correlation")
    else:
        print("~ Weak but SIGNIFICANT correlation")
else:
    print("~ Not statistically significant")

# Wind Direction (circular mean comparison)
print("\n[WIND DIRECTION ANALYSIS]")
print(f"Average wind direction on peak days: {merged['wind_direction_deg'].mean():.1f}¬∞")
print(f"Wind direction range: {merged['wind_direction_deg'].min():.0f}¬∞ to {merged['wind_direction_deg'].max():.0f}¬∞")

# Summary statistics
print("\n" + "="*80)
print("SUMMARY STATISTICS FOR PEAK FISHING DAYS")
print("="*80)
print(f"\nCatch statistics:")
print(f"  Average: {merged['catch_daily'].mean():,.0f} fish/day")
print(f"  Min: {merged['catch_daily'].min():,.0f}")
print(f"  Max: {merged['catch_daily'].max():,.0f}")

print(f"\nWind speed on these peak days:")
print(f"  Average: {merged['wind_speed_mph'].mean():.1f} mph")
print(f"  Min: {merged['wind_speed_mph'].min():.1f} mph")
print(f"  Max: {merged['wind_speed_mph'].max():.1f} mph")

print(f"\nTemperature on these peak days:")
print(f"  Average: {merged['temperature_f'].mean():.1f}¬∞F")
print(f"  Min: {merged['temperature_f'].min():.1f}¬∞F")
print(f"  Max: {merged['temperature_f'].max():.1f}¬∞F")

print("\n" + "="*80)

[MERGING DATA]
‚úì Merged 21 days with both catch and weather data

CORRELATION ANALYSIS - PEAK FISHING DAYS

[WIND SPEED vs CATCH]
Pearson Correlation: -0.3552
P-value: 0.114085
~ Not statistically significant (p ‚â• 0.05)

[TEMPERATURE vs CATCH]
Pearson Correlation: 0.2637
P-value: 0.248043
~ Not statistically significant

[WIND DIRECTION ANALYSIS]
Average wind direction on peak days: 111.3¬∞
Wind direction range: 69¬∞ to 225¬∞

SUMMARY STATISTICS FOR PEAK FISHING DAYS

Catch statistics:
  Average: 2,365,985 fish/day
  Min: 1,897,423
  Max: 2,988,534

Wind speed on these peak days:
  Average: 9.8 mph
  Min: 4.8 mph
  Max: 18.2 mph

Temperature on these peak days:
  Average: 53.5¬∞F
  Min: 49.6¬∞F
  Max: 59.6¬∞F



In [21]:
# Break down by district
print("="*80)
print("CORRELATION ANALYSIS BY DISTRICT")
print("="*80)

# Get daily catch by district
daily_by_district = catch_analysis.groupby(['date', 'district_id', 'district_name']).agg({
    'catch_daily': 'sum',
}).reset_index()

# Merge each district with weather
district_results = []

for district_id, info in DISTRICTS.items():
    district_name = info['name']
    print(f"\n{'='*80}")
    print(f"üìç {district_name.upper()} - {info['orientation']}")
    print(f"{'='*80}")
    
    # Get this district's data
    district_data = daily_by_district[daily_by_district['district_id'] == district_id]
    
    # Merge with weather
    district_weather = district_data.merge(weather_df, on='date', how='inner')
    
    if len(district_weather) < 3:
        print(f"Not enough data points ({len(district_weather)})")
        continue
    
    print(f"Data points: {len(district_weather)}")
    print(f"Avg daily catch: {district_weather['catch_daily'].mean():,.0f} fish")
    
    # Wind speed correlation
    try:
        corr_ws, p_ws = pearsonr(district_weather['wind_speed_mph'], district_weather['catch_daily'])
        print(f"\nWind Speed Correlation:")
        print(f"  r = {corr_ws:.4f}, p = {p_ws:.4f}", end="")
        if p_ws < 0.05:
            print(" ‚úì SIGNIFICANT")
        else:
            print()
        
        # Temperature correlation
        corr_t, p_t = pearsonr(district_weather['temperature_f'], district_weather['catch_daily'])
        print(f"Temperature Correlation:")
        print(f"  r = {corr_t:.4f}, p = {p_t:.4f}", end="")
        if p_t < 0.05:
            print(" ‚úì SIGNIFICANT")
        else:
            print()
        
        # Wind direction analysis
        avg_wind_dir = district_weather['wind_direction_deg'].mean()
        print(f"\nAverage Wind Direction: {avg_wind_dir:.1f}¬∞")
        print(f"District Orientation: {info['orientation']}")
        
        district_results.append({
            'district': district_name,
            'orientation': info['orientation'],
            'n': len(district_weather),
            'avg_catch': district_weather['catch_daily'].mean(),
            'wind_speed_corr': corr_ws,
            'wind_speed_p': p_ws,
            'temp_corr': corr_t,
            'temp_p': p_t,
            'avg_wind_dir': avg_wind_dir,
        })
        
    except Exception as e:
        print(f"Error: {e}")

# Create summary table
print("\n" + "="*80)
print("SUMMARY TABLE")
print("="*80)

summary_df = pd.DataFrame(district_results)
print(summary_df.to_string(index=False))

# Interpretation
print("\n" + "="*80)
print("KEY FINDINGS BY DISTRICT")
print("="*80)

for _, row in summary_df.iterrows():
    print(f"\n{row['district']} ({row['orientation']}):")
    print(f"  Wind Speed: r={row['wind_speed_corr']:.3f} (p={row['wind_speed_p']:.3f})", end="")
    if row['wind_speed_p'] < 0.05:
        print(" ‚úì")
    else:
        print()
    print(f"  Avg Wind Dir: {row['avg_wind_dir']:.0f}¬∞")

CORRELATION ANALYSIS BY DISTRICT

üìç NAKNEK-KVICHAK - Southeast-facing
Data points: 21
Avg daily catch: 823,372 fish

Wind Speed Correlation:
  r = -0.2560, p = 0.2627
Temperature Correlation:
  r = 0.2838, p = 0.2125

Average Wind Direction: 111.3¬∞
District Orientation: Southeast-facing

üìç EGEGIK - East-facing
Data points: 21
Avg daily catch: 481,880 fish

Wind Speed Correlation:
  r = 0.0259, p = 0.9113
Temperature Correlation:
  r = -0.0599, p = 0.7964

Average Wind Direction: 111.3¬∞
District Orientation: East-facing

üìç UGASHIK - Northwest-facing
Data points: 21
Avg daily catch: 187,457 fish

Wind Speed Correlation:
  r = -0.2477, p = 0.2790
Temperature Correlation:
  r = 0.4352, p = 0.0487 ‚úì SIGNIFICANT

Average Wind Direction: 111.3¬∞
District Orientation: Northwest-facing

üìç NUSHAGAK - Southeast-facing
Data points: 21
Avg daily catch: 863,204 fish

Wind Speed Correlation:
  r = 0.0217, p = 0.9255
Temperature Correlation:
  r = -0.1808, p = 0.4328

Average Wind Dire