In [1]:
import numpy as np
import pandas as pd
from pathlib import Path

In [2]:
# Load data into dataframe
file_path = Path('../Resources/refactored_data/combineddata2.csv')
df = pd.read_csv(file_path)
df

Unnamed: 0,countid,yearvalue,monthvalue,weeknumber,locationname,daterecorded,maxtempf,mintempf,precipitationinch,watertempf,stlheadcount,chinookcount,sockeyecount,cohocount,shadcount
0,19900101,1990,1,1,Bonneville,1/1/1990,49.0,35.0,0.89,,,,,,
1,19900102,1990,1,1,Bonneville,1/2/1990,45.0,36.0,0.85,,,,,,
2,19900103,1990,1,1,Bonneville,1/3/1990,46.0,39.0,0.02,,,,,,
3,19900104,1990,1,1,Bonneville,1/4/1990,49.0,42.0,0.04,,,,,,
4,19900105,1990,1,1,Bonneville,1/5/1990,53.0,45.0,0.47,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11683,20211227,2021,12,53,Bonneville,12/27/2021,37.0,26.0,0.18,,19.0,,,,
11684,20211228,2021,12,53,Bonneville,12/28/2021,30.0,25.0,0.12,,15.0,1.0,,,
11685,20211229,2021,12,53,Bonneville,12/29/2021,32.0,25.0,0.10,,,,,,
11686,20211230,2021,12,53,Bonneville,12/30/2021,38.0,24.0,0.05,,,,,,


In [3]:
df.dtypes

countid                int64
yearvalue              int64
monthvalue             int64
weeknumber             int64
locationname          object
daterecorded          object
maxtempf             float64
mintempf             float64
precipitationinch    float64
watertempf           float64
stlheadcount         float64
chinookcount         float64
sockeyecount         float64
cohocount            float64
shadcount            float64
dtype: object

In [4]:
import datetime as dt
df.daterecorded = pd.to_datetime(df.daterecorded)
df['day'] = df['daterecorded'].dt.dayofyear
df

Unnamed: 0,countid,yearvalue,monthvalue,weeknumber,locationname,daterecorded,maxtempf,mintempf,precipitationinch,watertempf,stlheadcount,chinookcount,sockeyecount,cohocount,shadcount,day
0,19900101,1990,1,1,Bonneville,1990-01-01,49.0,35.0,0.89,,,,,,,1
1,19900102,1990,1,1,Bonneville,1990-01-02,45.0,36.0,0.85,,,,,,,2
2,19900103,1990,1,1,Bonneville,1990-01-03,46.0,39.0,0.02,,,,,,,3
3,19900104,1990,1,1,Bonneville,1990-01-04,49.0,42.0,0.04,,,,,,,4
4,19900105,1990,1,1,Bonneville,1990-01-05,53.0,45.0,0.47,,,,,,,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11683,20211227,2021,12,53,Bonneville,2021-12-27,37.0,26.0,0.18,,19.0,,,,,361
11684,20211228,2021,12,53,Bonneville,2021-12-28,30.0,25.0,0.12,,15.0,1.0,,,,362
11685,20211229,2021,12,53,Bonneville,2021-12-29,32.0,25.0,0.10,,,,,,,363
11686,20211230,2021,12,53,Bonneville,2021-12-30,38.0,24.0,0.05,,,,,,,364


# Steelhead Count Daily Groupby

In [5]:
# Pull desired steelhead columns
df_stlhead = df[['day','maxtempf','mintempf','precipitationinch','watertempf','stlheadcount']]
df_stlhead

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,stlheadcount
0,1,49.0,35.0,0.89,,
1,2,45.0,36.0,0.85,,
2,3,46.0,39.0,0.02,,
3,4,49.0,42.0,0.04,,
4,5,53.0,45.0,0.47,,
...,...,...,...,...,...,...
11683,361,37.0,26.0,0.18,,19.0
11684,362,30.0,25.0,0.12,,15.0
11685,363,32.0,25.0,0.10,,
11686,364,38.0,24.0,0.05,,


In [6]:
# Drop NaN values
df_stlhead = df_stlhead.dropna(axis=0)
df_stlhead

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,stlheadcount
73,74,53.0,38.0,0.12,44.06,27.0
74,75,69.0,38.0,0.00,44.96,44.0
75,76,61.0,44.0,0.00,44.06,36.0
76,77,59.0,44.0,0.02,44.96,60.0
77,78,67.0,44.0,0.00,46.04,46.0
...,...,...,...,...,...,...
11583,261,76.0,52.0,0.93,66.56,590.0
11584,262,60.0,51.0,1.50,66.56,702.0
11585,263,64.0,50.0,0.58,66.74,698.0
11586,264,75.0,50.0,0.00,66.38,1004.0


In [7]:
# Group by average count per week
df_stlhead_day = df_stlhead.groupby(['day']).mean().reset_index()
df_stlhead_day.head(3)

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,stlheadcount
0,1,41.071429,30.0,0.238571,40.897143,13.0
1,2,41.214286,32.0,0.440714,40.935714,15.5
2,3,41.583333,32.75,0.428333,40.655,17.333333


In [8]:
df_stlhead_day = df_stlhead_day.astype({'stlheadcount':'int64'})
df_stlhead_day.dtypes

day                    int64
maxtempf             float64
mintempf             float64
precipitationinch    float64
watertempf           float64
stlheadcount           int64
dtype: object

In [9]:
df_stlhead_day

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,stlheadcount
0,1,41.071429,30.000000,0.238571,40.897143,13
1,2,41.214286,32.000000,0.440714,40.935714,15
2,3,41.583333,32.750000,0.428333,40.655000,17
3,4,39.928571,32.357143,0.320714,40.562857,14
4,5,41.583333,31.833333,0.666667,40.670000,12
...,...,...,...,...,...,...
361,362,43.400000,35.500000,0.903000,41.594000,21
362,363,46.750000,36.500000,0.695000,41.427500,21
363,364,42.500000,32.900000,0.548000,41.540000,18
364,365,40.111111,32.333333,0.481111,41.120000,14


In [10]:
df_stlhead_day.to_csv('../Resources/refactored_data/daily_fish_groups/df_stlhead_day.csv', index=False)

# Sockeye Count Weekly Groupby

In [11]:
# Pull desired steelhead columns
df_sockeye = df[['day','maxtempf','mintempf','precipitationinch','watertempf','sockeyecount']]
df_sockeye

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,sockeyecount
0,1,49.0,35.0,0.89,,
1,2,45.0,36.0,0.85,,
2,3,46.0,39.0,0.02,,
3,4,49.0,42.0,0.04,,
4,5,53.0,45.0,0.47,,
...,...,...,...,...,...,...
11683,361,37.0,26.0,0.18,,
11684,362,30.0,25.0,0.12,,
11685,363,32.0,25.0,0.10,,
11686,364,38.0,24.0,0.05,,


In [12]:
# Drop NaN values
df_sockeye = df_sockeye.dropna(axis=0)
df_sockeye

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,sockeyecount
152,153,65.0,51.0,0.03,59.00,1.0
155,156,64.0,49.0,0.07,59.00,3.0
158,159,69.0,52.0,0.06,59.00,1.0
159,160,64.0,53.0,0.39,59.00,3.0
161,162,59.0,48.0,0.68,59.00,5.0
...,...,...,...,...,...,...
11575,253,83.0,56.0,0.00,69.08,1.0
11576,254,65.0,56.0,0.00,68.54,3.0
11582,260,73.0,43.0,0.00,66.56,1.0
11583,261,76.0,52.0,0.93,66.56,1.0


In [15]:
# Group by average count per week
df_sockeye_day = df_sockeye.groupby(['day']).mean().reset_index()
df_sockeye_day

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,sockeyecount
0,11,59.0,45.0,0.650,39.02,1.0
1,17,50.0,36.0,0.000,41.00,1.0
2,34,56.0,41.0,0.420,39.92,1.0
3,97,64.0,44.0,0.010,46.04,1.0
4,107,66.0,46.0,0.000,50.00,1.0
...,...,...,...,...,...,...
144,266,64.0,50.0,0.345,66.92,1.0
145,273,67.0,54.0,0.200,66.92,-1.0
146,274,66.0,52.0,2.010,64.22,1.0
147,284,62.0,49.0,0.450,64.94,1.0


In [24]:
df_sockeye_day = df_sockeye_day.astype({'sockeyecount':'int64'})
df_sockeye_day.dtypes

day                    int64
maxtempf             float64
mintempf             float64
precipitationinch    float64
watertempf           float64
sockeyecount           int64
dtype: object

In [17]:
df_sockeye_day.head()

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,sockeyecount
152,153,65.0,51.0,0.03,59.0,1
155,156,64.0,49.0,0.07,59.0,3
158,159,69.0,52.0,0.06,59.0,1
159,160,64.0,53.0,0.39,59.0,3
161,162,59.0,48.0,0.68,59.0,5


In [18]:
df_sockeye_day.to_csv('../Resources/refactored_data/daily_fish_groups/df_sockeye_day.csv', index=False)

# Coho Count Weekly Groupby

In [20]:
# Pull desired steelhead columns
df_coho = df[['day','maxtempf','mintempf','precipitationinch','watertempf','cohocount']]
df_coho

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,cohocount
0,1,49.0,35.0,0.89,,
1,2,45.0,36.0,0.85,,
2,3,46.0,39.0,0.02,,
3,4,49.0,42.0,0.04,,
4,5,53.0,45.0,0.47,,
...,...,...,...,...,...,...
11683,361,37.0,26.0,0.18,,
11684,362,30.0,25.0,0.12,,
11685,363,32.0,25.0,0.10,,
11686,364,38.0,24.0,0.05,,


In [21]:
# Drop NaN values
df_coho = df_coho.dropna(axis=0)
df_coho

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,cohocount
196,196,86.0,60.0,0.00,66.92,5.0
213,213,75.0,61.0,0.00,71.96,2.0
214,214,80.0,56.0,0.00,71.06,2.0
220,220,88.0,61.0,0.00,73.04,1.0
235,235,71.0,54.0,0.00,71.06,2.0
...,...,...,...,...,...,...
11583,261,76.0,52.0,0.93,66.56,2734.0
11584,262,60.0,51.0,1.50,66.56,3735.0
11585,263,64.0,50.0,0.58,66.74,4102.0
11586,264,75.0,50.0,0.00,66.38,3124.0


In [22]:
# Group by average count per week
df_coho_day = df_coho.groupby(['day']).mean().reset_index()
df_coho_day

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,cohocount
0,1,42.500000,33.000000,0.115000,39.470,-1.000000
1,2,46.000000,35.000000,0.000000,39.920,5.000000
2,3,46.500000,38.000000,0.215000,39.920,2.000000
3,4,44.750000,32.250000,0.420000,40.910,-1.500000
4,5,41.500000,33.000000,1.060000,40.460,-3.500000
...,...,...,...,...,...,...
196,361,41.750000,34.000000,0.100000,40.955,0.250000
197,362,46.666667,37.666667,1.060000,41.660,0.333333
198,363,41.666667,32.666667,0.206667,41.300,-0.333333
199,364,44.500000,36.000000,1.410000,40.550,0.000000


In [23]:
df_coho_day = df_coho_day.astype({'cohocount':'int64'})
df_coho_day.dtypes

day                    int64
maxtempf             float64
mintempf             float64
precipitationinch    float64
watertempf           float64
cohocount              int64
dtype: object

In [25]:
df_coho_day

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,cohocount
0,1,42.500000,33.000000,0.115000,39.470,-1
1,2,46.000000,35.000000,0.000000,39.920,5
2,3,46.500000,38.000000,0.215000,39.920,2
3,4,44.750000,32.250000,0.420000,40.910,-1
4,5,41.500000,33.000000,1.060000,40.460,-3
...,...,...,...,...,...,...
196,361,41.750000,34.000000,0.100000,40.955,0
197,362,46.666667,37.666667,1.060000,41.660,0
198,363,41.666667,32.666667,0.206667,41.300,0
199,364,44.500000,36.000000,1.410000,40.550,0


In [27]:
df_coho_day.to_csv('../Resources/refactored_data/daily_fish_groups/df_coho_day.csv', index=False)

# Chinook Count Weekly Groupby

In [28]:
# Pull desired steelhead columns
df_chinook = df[['day','maxtempf','mintempf','precipitationinch','watertempf','chinookcount']]
df_chinook

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,chinookcount
0,1,49.0,35.0,0.89,,
1,2,45.0,36.0,0.85,,
2,3,46.0,39.0,0.02,,
3,4,49.0,42.0,0.04,,
4,5,53.0,45.0,0.47,,
...,...,...,...,...,...,...
11683,361,37.0,26.0,0.18,,
11684,362,30.0,25.0,0.12,,1.0
11685,363,32.0,25.0,0.10,,
11686,364,38.0,24.0,0.05,,


In [29]:
# Drop NaN values
df_chinook = df_chinook.dropna(axis=0)
df_chinook

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,chinookcount
73,74,53.0,38.0,0.12,44.06,2.0
74,75,69.0,38.0,0.00,44.96,1.0
75,76,61.0,44.0,0.00,44.06,2.0
76,77,59.0,44.0,0.02,44.96,1.0
78,79,63.0,47.0,0.14,46.04,2.0
...,...,...,...,...,...,...
11583,261,76.0,52.0,0.93,66.56,4379.0
11584,262,60.0,51.0,1.50,66.56,5383.0
11585,263,64.0,50.0,0.58,66.74,4409.0
11586,264,75.0,50.0,0.00,66.38,3238.0


In [30]:
# Group by average count per week
df_chinook_day = df_chinook.groupby(['day']).mean().reset_index()
df_chinook_day.head(3)

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,chinookcount
0,1,51.0,34.0,0.0,39.92,1.0
1,3,42.0,38.0,0.01,39.92,2.0
2,5,43.0,28.0,0.02,41.0,1.0


In [31]:
df_chinook_day = df_chinook_day.astype({'chinookcount':'int64'})
df_chinook_day.dtypes

day                    int64
maxtempf             float64
mintempf             float64
precipitationinch    float64
watertempf           float64
chinookcount           int64
dtype: object

In [32]:
df_chinook_day.head()

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,chinookcount
0,1,51.0,34.0,0.0,39.92,1
1,3,42.0,38.0,0.01,39.92,2
2,5,43.0,28.0,0.02,41.0,1
3,8,37.0,34.0,0.0,39.92,1
4,11,59.0,45.0,0.65,39.02,1


In [34]:
df_chinook_day.to_csv('../Resources/refactored_data/daily_fish_groups/df_chinook_day.csv', index=False)

# Shad Count Weekly Groupby

In [35]:
# Pull desired steelhead columns
df_shad = df[['day','maxtempf','mintempf','precipitationinch','watertempf','shadcount']]
df_shad

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,shadcount
0,1,49.0,35.0,0.89,,
1,2,45.0,36.0,0.85,,
2,3,46.0,39.0,0.02,,
3,4,49.0,42.0,0.04,,
4,5,53.0,45.0,0.47,,
...,...,...,...,...,...,...
11683,361,37.0,26.0,0.18,,
11684,362,30.0,25.0,0.12,,
11685,363,32.0,25.0,0.10,,
11686,364,38.0,24.0,0.05,,


In [36]:
# Drop NaN values
df_shad = df_shad.dropna(axis=0)
df_shad

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,shadcount
113,114,55.0,41.0,0.27,53.96,2.0
117,118,51.0,41.0,1.68,53.96,1.0
119,120,61.0,41.0,0.08,53.06,1.0
121,122,70.0,48.0,0.05,53.96,24.0
122,123,66.0,51.0,0.00,53.96,9.0
...,...,...,...,...,...,...
11561,239,75.0,60.0,0.00,69.80,8.0
11562,240,71.0,51.0,0.00,69.62,57.0
11563,241,83.0,51.0,0.00,69.62,20.0
11564,242,85.0,53.0,0.00,69.26,61.0


In [38]:
# Group by average count per week
df_shad_day = df_shad.groupby(['day']).mean().reset_index()
df_shad_day

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,shadcount
0,78,51.000000,38.000000,0.100000,39.920000,1.000000
1,79,56.000000,38.000000,0.620000,39.920000,2.000000
2,80,57.000000,38.000000,0.000000,41.000000,8.000000
3,105,62.000000,48.000000,0.170000,51.980000,1.000000
4,106,69.000000,50.000000,0.000000,51.980000,1.000000
...,...,...,...,...,...,...
135,240,81.689655,57.103448,0.008966,70.066897,38.172414
136,241,80.133333,56.766667,0.041333,70.130000,27.966667
137,242,77.037037,56.629630,0.094074,69.960000,27.518519
138,243,77.965517,55.724138,0.034138,69.930345,23.586207


In [39]:
df_shad_day = df_shad_day.astype({'shadcount':'int64'})
df_shad_day.dtypes

day                    int64
maxtempf             float64
mintempf             float64
precipitationinch    float64
watertempf           float64
shadcount              int64
dtype: object

In [41]:
df_shad_day

Unnamed: 0,day,maxtempf,mintempf,precipitationinch,watertempf,shadcount
0,78,51.000000,38.000000,0.100000,39.920000,1
1,79,56.000000,38.000000,0.620000,39.920000,2
2,80,57.000000,38.000000,0.000000,41.000000,8
3,105,62.000000,48.000000,0.170000,51.980000,1
4,106,69.000000,50.000000,0.000000,51.980000,1
...,...,...,...,...,...,...
135,240,81.689655,57.103448,0.008966,70.066897,38
136,241,80.133333,56.766667,0.041333,70.130000,27
137,242,77.037037,56.629630,0.094074,69.960000,27
138,243,77.965517,55.724138,0.034138,69.930345,23


In [42]:
df_shad_day.to_csv('../Resources/refactored_data/daily_fish_groups/df_shad_day.csv', index=False)