In [1]:
import numpy as np
import pandas as pd
from pathlib import Path

In [2]:
# Load data into dataframe
file_path = Path('../Resources/refactored_data/combineddata2.csv')
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,countid,yearvalue,monthvalue,weeknumber,locationname,daterecorded,maxtempf,mintempf,precipitationinch,watertempf,stlheadcount,chinookcount,sockeyecount,cohocount,shadcount
0,19900101,1990,1,1,Bonneville,1/1/1990,49.0,35.0,0.89,,,,,,
1,19900102,1990,1,1,Bonneville,1/2/1990,45.0,36.0,0.85,,,,,,
2,19900103,1990,1,1,Bonneville,1/3/1990,46.0,39.0,0.02,,,,,,
3,19900104,1990,1,1,Bonneville,1/4/1990,49.0,42.0,0.04,,,,,,
4,19900105,1990,1,1,Bonneville,1/5/1990,53.0,45.0,0.47,,,,,,


# Steelhead Count Weekly Groupby

In [3]:
# Pull desired steelhead columns
df_stlhead = df[['weeknumber','maxtempf','mintempf','precipitationinch','watertempf','stlheadcount']]
df_stlhead.head(3)

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,stlheadcount
0,1,49.0,35.0,0.89,,
1,1,45.0,36.0,0.85,,
2,1,46.0,39.0,0.02,,


In [4]:
# Drop NaN values
df_stlhead = df_stlhead.dropna(axis=0)
df_stlhead.head(3)

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,stlheadcount
73,11,53.0,38.0,0.12,44.06,27.0
74,11,69.0,38.0,0.0,44.96,44.0
75,11,61.0,44.0,0.0,44.06,36.0


In [5]:
# Group by average count per week
df_stlhead_week = df_stlhead.groupby(['weeknumber']).mean().reset_index()
df_stlhead_week.head(3)

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,stlheadcount
0,1,41.833333,32.185185,0.366481,40.86,17.185185
1,2,42.043956,32.934066,0.410659,40.365055,17.395604
2,3,41.988764,33.550562,0.320674,39.600449,16.258427


In [6]:
df_stlhead = df_stlhead.astype({'stlheadcount':'int64'})
df_stlhead.dtypes

weeknumber             int64
maxtempf             float64
mintempf             float64
precipitationinch    float64
watertempf           float64
stlheadcount           int64
dtype: object

In [7]:
df_stlhead_week.head()

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,stlheadcount
0,1,41.833333,32.185185,0.366481,40.86,17.185185
1,2,42.043956,32.934066,0.410659,40.365055,17.395604
2,3,41.988764,33.550562,0.320674,39.600449,16.258427
3,4,44.470588,35.082353,0.316706,38.816706,14.552941
4,5,47.13253,37.060241,0.461205,39.098072,16.879518


In [32]:
df_stlhead_week.to_csv('../Resources/refactored_data/weekly_fish_groups/df_stlhead_week.csv', index=False)

# Sockeye Count Weekly Groupby

In [8]:
# Pull desired steelhead columns
df_sockeye = df[['weeknumber','maxtempf','mintempf','precipitationinch','watertempf','sockeyecount']]
df_sockeye.head(3)

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,sockeyecount
0,1,49.0,35.0,0.89,,
1,1,45.0,36.0,0.85,,
2,1,46.0,39.0,0.02,,


In [9]:
# Drop NaN values
df_sockeye = df_sockeye.dropna(axis=0)
df_sockeye.head(3)

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,sockeyecount
152,22,65.0,51.0,0.03,59.0,1.0
155,23,64.0,49.0,0.07,59.0,3.0
158,23,69.0,52.0,0.06,59.0,1.0


In [10]:
# Group by average count per week
df_sockeye_week = df_sockeye.groupby(['weeknumber']).mean().reset_index()
df_sockeye_week.head(3)

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,sockeyecount
0,2,59.0,45.0,0.65,39.02,1.0
1,3,50.0,36.0,0.0,41.0,1.0
2,6,56.0,41.0,0.42,39.92,1.0


In [13]:
df_sockeye = df_sockeye.astype({'sockeyecount':'int64'})
df_sockeye.dtypes

weeknumber             int64
maxtempf             float64
mintempf             float64
precipitationinch    float64
watertempf           float64
sockeyecount           int64
dtype: object

In [14]:
df_sockeye_week.head()

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,sockeyecount
0,2,59.0,45.0,0.65,39.02,1.0
1,3,50.0,36.0,0.0,41.0,1.0
2,6,56.0,41.0,0.42,39.92,1.0
3,15,64.0,44.0,0.01,46.04,1.0
4,16,66.0,46.0,0.0,50.0,1.0


In [33]:
df_sockeye_week.to_csv('../Resources/refactored_data/weekly_fish_groups/df_sockeye_week.csv', index=False)

# Coho Count Weekly Groupby

In [15]:
# Pull desired steelhead columns
df_coho = df[['weeknumber','maxtempf','mintempf','precipitationinch','watertempf','cohocount']]
df_coho.head(3)

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,cohocount
0,1,49.0,35.0,0.89,,
1,1,45.0,36.0,0.85,,
2,1,46.0,39.0,0.02,,


In [16]:
# Drop NaN values
df_coho = df_coho.dropna(axis=0)
df_coho.head(3)

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,cohocount
196,29,86.0,60.0,0.0,66.92,5.0
213,31,75.0,61.0,0.0,71.96,2.0
214,31,80.0,56.0,0.0,71.06,2.0


In [17]:
# Group by average count per week
df_coho_week = df_coho.groupby(['weeknumber']).mean().reset_index()
df_coho_week.head(3)

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,cohocount
0,1,46.333333,34.833333,0.268333,40.46,0.0
1,2,40.692308,32.538462,0.416923,40.072308,-0.615385
2,3,41.0,33.555556,0.248889,39.94,-0.333333


In [19]:
df_coho = df_coho.astype({'cohocount':'int64'})
df_coho.dtypes

weeknumber             int64
maxtempf             float64
mintempf             float64
precipitationinch    float64
watertempf           float64
cohocount              int64
dtype: object

In [34]:
df_coho_week.head()

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,cohocount
0,1,46.333333,34.833333,0.268333,40.46,0.0
1,2,40.692308,32.538462,0.416923,40.072308,-0.615385
2,3,41.0,33.555556,0.248889,39.94,-0.333333
3,4,39.5,32.0,0.28,39.02,0.0
4,5,45.25,37.75,0.08,41.0,-1.0


In [35]:
df_coho_week.to_csv('../Resources/refactored_data/weekly_fish_groups/df_coho_week.csv', index=False)

# Chinook Count Weekly Groupby

In [20]:
# Pull desired steelhead columns
df_chinook = df[['weeknumber','maxtempf','mintempf','precipitationinch','watertempf','chinookcount']]
df_chinook.head(3)

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,chinookcount
0,1,49.0,35.0,0.89,,
1,1,45.0,36.0,0.85,,
2,1,46.0,39.0,0.02,,


In [21]:
# Drop NaN values
df_chinook = df_chinook.dropna(axis=0)
df_chinook.head(3)

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,chinookcount
73,11,53.0,38.0,0.12,44.06,2.0
74,11,69.0,38.0,0.0,44.96,1.0
75,11,61.0,44.0,0.0,44.06,2.0


In [22]:
# Group by average count per week
df_chinook_week = df_chinook.groupby(['weeknumber']).mean().reset_index()
df_chinook_week.head(3)

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,chinookcount
0,1,51.0,34.0,0.0,39.92,1.0
1,2,45.25,36.25,0.17,39.965,1.25
2,3,46.333333,38.833333,0.206667,40.73,-0.166667


In [24]:
df_chinook = df_chinook.astype({'chinookcount':'int64'})
df_chinook.dtypes

weeknumber             int64
maxtempf             float64
mintempf             float64
precipitationinch    float64
watertempf           float64
chinookcount           int64
dtype: object

In [25]:
df_chinook_week.head()

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,chinookcount
0,1,51.0,34.0,0.0,39.92,1.0
1,2,45.25,36.25,0.17,39.965,1.25
2,3,46.333333,38.833333,0.206667,40.73,-0.166667
3,4,47.666667,35.5,0.551667,41.0,0.666667
4,5,54.833333,40.666667,1.015,41.42,1.0


In [36]:
df_chinook_week.to_csv('../Resources/refactored_data/weekly_fish_groups/df_chinook_week.csv', index=False)

# Shad Count Weekly Groupby

In [26]:
# Pull desired steelhead columns
df_shad = df[['weeknumber','maxtempf','mintempf','precipitationinch','watertempf','shadcount']]
df_shad.head(3)

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,shadcount
0,1,49.0,35.0,0.89,,
1,1,45.0,36.0,0.85,,
2,1,46.0,39.0,0.02,,


In [28]:
# Drop NaN values
df_shad = df_shad.dropna(axis=0)
df_shad.head(3)

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,shadcount
113,17,55.0,41.0,0.27,53.96,2.0
117,17,51.0,41.0,1.68,53.96,1.0
119,18,61.0,41.0,0.08,53.06,1.0


In [29]:
# Group by average count per week
df_shad_week = df_shad.groupby(['weeknumber']).mean().reset_index()
df_shad_week.head(3)

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,shadcount
0,12,53.5,38.0,0.36,39.92,1.5
1,13,57.0,38.0,0.0,41.0,8.0
2,16,64.5,49.0,0.2625,51.98,2.25


In [30]:
df_shad = df_shad.astype({'shadcount':'int64'})
df_shad.dtypes

weeknumber             int64
maxtempf             float64
mintempf             float64
precipitationinch    float64
watertempf           float64
shadcount              int64
dtype: object

In [31]:
df_shad_week.head()

Unnamed: 0,weeknumber,maxtempf,mintempf,precipitationinch,watertempf,shadcount
0,12,53.5,38.0,0.36,39.92,1.5
1,13,57.0,38.0,0.0,41.0,8.0
2,16,64.5,49.0,0.2625,51.98,2.25
3,17,61.333333,42.0,0.235833,51.92,3.166667
4,18,67.5,46.017857,0.185714,53.545357,18.107143


In [37]:
df_shad_week.to_csv('../Resources/refactored_data/weekly_fish_groups/df_shad_week.csv', index=False)

# Weather Outlook Group by Day of the Year

In [None]:
# Load data to a df
file_path = Path('../Resources/weather/dailyWeatherTable.csv')
df_weather_outlook = pd.read_csv(file_path)
df_weather_outlook.head()