In [1]:
# Import dependencies
import numpy as np
import pandas as pd

In [2]:
# Read in csv file
salmon_df = pd.read_csv('Data/salmon_data.csv')
salmon_df.head()

Unnamed: 0.1,Unnamed: 0,Brood Year,Nwr Population Name,Number Of Spawners,Start Year,End Year,Effective Catch,Fracwild
0,616,1975,Chinook Salmon (Snake River Fall-run ESU) - Sn...,1000.0,1975.0,2008.0,1851.0,1.0
1,617,1976,Chinook Salmon (Snake River Fall-run ESU) - Sn...,470.0,1975.0,2008.0,617.0,1.0
2,618,1977,Chinook Salmon (Snake River Fall-run ESU) - Sn...,600.0,1975.0,2008.0,1097.0,1.0
3,619,1978,Chinook Salmon (Snake River Fall-run ESU) - Sn...,640.0,1975.0,2008.0,1011.0,1.0
4,620,1979,Chinook Salmon (Snake River Fall-run ESU) - Sn...,500.0,1975.0,2008.0,514.0,1.0


In [3]:
# Drop unnecessary columns
salmon_df.drop(columns = [
    'Unnamed: 0',
    'Start Year',
    'End Year',
    'Effective Catch'],
    inplace=True)

salmon_df.head()

Unnamed: 0,Brood Year,Nwr Population Name,Number Of Spawners,Fracwild
0,1975,Chinook Salmon (Snake River Fall-run ESU) - Sn...,1000.0,1.0
1,1976,Chinook Salmon (Snake River Fall-run ESU) - Sn...,470.0,1.0
2,1977,Chinook Salmon (Snake River Fall-run ESU) - Sn...,600.0,1.0
3,1978,Chinook Salmon (Snake River Fall-run ESU) - Sn...,640.0,1.0
4,1979,Chinook Salmon (Snake River Fall-run ESU) - Sn...,500.0,1.0


In [5]:
# Drop duplicate rows
salmon_df = salmon_df.drop_duplicates(subset=['Brood Year', 'Nwr Population Name'])
salmon_df

Unnamed: 0,Brood Year,Nwr Population Name,Number Of Spawners,Fracwild
0,1975,Chinook Salmon (Snake River Fall-run ESU) - Sn...,1000.0,1.00
1,1976,Chinook Salmon (Snake River Fall-run ESU) - Sn...,470.0,1.00
2,1977,Chinook Salmon (Snake River Fall-run ESU) - Sn...,600.0,1.00
3,1978,Chinook Salmon (Snake River Fall-run ESU) - Sn...,640.0,1.00
4,1979,Chinook Salmon (Snake River Fall-run ESU) - Sn...,500.0,1.00
...,...,...,...,...
15144,2006,Chinook Salmon (Lower Columbia River ESU) - Wh...,796.0,-99.00
15145,2007,Chinook Salmon (Lower Columbia River ESU) - Wh...,1014.0,-99.00
15146,2008,Chinook Salmon (Lower Columbia River ESU) - Wh...,775.0,-99.00
15147,2009,Chinook Salmon (Lower Columbia River ESU) - Wh...,1342.0,-99.00


In [7]:
# Keep only rows that at >= 0 in Number of Spawners
salmon_df_filtered = salmon_df[salmon_df['Number Of Spawners'] >= 0]
salmon_df['Number Of Spawners'].replace('', np.nan, inplace=True)

# Keep only rows that at >= 0 in Frac Wild
salmon_df_filtered = salmon_df_filtered[salmon_df_filtered['Fracwild'] >= 0]
salmon_df_filtered['Fracwild'].replace('', np.nan, inplace=True)

salmon_df_filtered.dropna()

# Reset Index
salmon_df_filtered.reset_index(drop=True, inplace=True)
salmon_df_filtered

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)


Unnamed: 0,Brood Year,Nwr Population Name,Number Of Spawners,Fracwild
0,1975,Chinook Salmon (Snake River Fall-run ESU) - Sn...,1000.0,1.00
1,1976,Chinook Salmon (Snake River Fall-run ESU) - Sn...,470.0,1.00
2,1977,Chinook Salmon (Snake River Fall-run ESU) - Sn...,600.0,1.00
3,1978,Chinook Salmon (Snake River Fall-run ESU) - Sn...,640.0,1.00
4,1979,Chinook Salmon (Snake River Fall-run ESU) - Sn...,500.0,1.00
...,...,...,...,...
6881,2002,Chinook Salmon (Lower Columbia River ESU) - Wh...,1859.0,0.38
6882,2003,Chinook Salmon (Lower Columbia River ESU) - Wh...,11898.0,0.19
6883,2004,Chinook Salmon (Lower Columbia River ESU) - Wh...,8850.0,0.19
6884,2005,Chinook Salmon (Lower Columbia River ESU) - Wh...,1504.0,0.18


In [8]:
# Creating Number of Wild Spawners column
salmon_df_filtered["Wild_Spawners"] = salmon_df_filtered["Number Of Spawners"]*salmon_df_filtered["Fracwild"]
# salmon_df_filtered["Wild_Spawners"] = salmon_df_filtered["Number Of Spawners"].multiple(salmon_df_filtered["Fracwild"], axis="index")
salmon_df_filtered.head()

Unnamed: 0,Brood Year,Nwr Population Name,Number Of Spawners,Fracwild,Wild_Spawners
0,1975,Chinook Salmon (Snake River Fall-run ESU) - Sn...,1000.0,1.0,1000.0
1,1976,Chinook Salmon (Snake River Fall-run ESU) - Sn...,470.0,1.0,470.0
2,1977,Chinook Salmon (Snake River Fall-run ESU) - Sn...,600.0,1.0,600.0
3,1978,Chinook Salmon (Snake River Fall-run ESU) - Sn...,640.0,1.0,640.0
4,1979,Chinook Salmon (Snake River Fall-run ESU) - Sn...,500.0,1.0,500.0


In [9]:
# Create a data frame with the last row removed
prev_year = salmon_df_filtered.drop(salmon_df_filtered.index[6885])

# Creating arrays to hold data from the previous years independent variables
prev_wild_spawners = prev_year['Wild_Spawners']
prev_wild_spawners

0       1000.00
1        470.00
2        600.00
3        640.00
4        500.00
         ...   
6880     704.48
6881     706.42
6882    2260.62
6883    1681.50
6884     270.72
Name: Wild_Spawners, Length: 6885, dtype: float64

In [11]:
# Create a data frame with the last two rows removed
two_yr_prior = salmon_df_filtered.drop(salmon_df_filtered.index[[6885, 6884]])

# Creating arrays to hold data for independent variables from two years prior
two_yr_prior_wild_spawners = two_yr_prior['Wild_Spawners']
two_yr_prior_wild_spawners

0       1000.00
1        470.00
2        600.00
3        640.00
4        500.00
         ...   
6879      85.17
6880     704.48
6881     706.42
6882    2260.62
6883    1681.50
Name: Wild_Spawners, Length: 6884, dtype: float64

In [13]:
# Create a data frame with the last three rows removed
three_yr_prior = salmon_df_filtered.drop(salmon_df_filtered.index[[6885, 6884, 6883]])

# Creating arrays to hold data for independent variables from two years prior
three_yr_prior_wild_spawners = three_yr_prior['Wild_Spawners']
three_yr_prior_wild_spawners

0       1000.00
1        470.00
2        600.00
3        640.00
4        500.00
         ...   
6878     264.66
6879      85.17
6880     704.48
6881     706.42
6882    2260.62
Name: Wild_Spawners, Length: 6883, dtype: float64

In [14]:
# Adding a column to the dataframe that holds Wild Spawner from previous year
# Delete first row
salmon_df_filtered = salmon_df_filtered.drop(salmon_df_filtered.index[0])
salmon_df_filtered.reset_index(drop=True, inplace=True)

# Adding previous year array as column
salmon_df_filtered['Wild_Spawners_Prev_Yr'] = prev_wild_spawners.tolist()
salmon_df_filtered.head()

Unnamed: 0,Brood Year,Nwr Population Name,Number Of Spawners,Fracwild,Wild_Spawners,Wild_Spawners_Prev_Yr
0,1976,Chinook Salmon (Snake River Fall-run ESU) - Sn...,470.0,1.0,470.0,1000.0
1,1977,Chinook Salmon (Snake River Fall-run ESU) - Sn...,600.0,1.0,600.0,470.0
2,1978,Chinook Salmon (Snake River Fall-run ESU) - Sn...,640.0,1.0,640.0,600.0
3,1979,Chinook Salmon (Snake River Fall-run ESU) - Sn...,500.0,1.0,500.0,640.0
4,1980,Chinook Salmon (Snake River Fall-run ESU) - Sn...,450.0,1.0,450.0,500.0


In [15]:
# Adding a column to the dataframe that holds Wild Spawner from two years prior
# Delete first row
salmon_df_filtered = salmon_df_filtered.drop(salmon_df_filtered.index[0])
salmon_df_filtered.reset_index(drop=True, inplace=True)

# Adding two years prior array as column
salmon_df_filtered['Wild_Spawners_Two_Yrs_Prior'] = two_yr_prior_wild_spawners.tolist()
salmon_df_filtered.head()

Unnamed: 0,Brood Year,Nwr Population Name,Number Of Spawners,Fracwild,Wild_Spawners,Wild_Spawners_Prev_Yr,Wild_Spawners_Two_Yrs_Prior
0,1977,Chinook Salmon (Snake River Fall-run ESU) - Sn...,600.0,1.0,600.0,470.0,1000.0
1,1978,Chinook Salmon (Snake River Fall-run ESU) - Sn...,640.0,1.0,640.0,600.0,470.0
2,1979,Chinook Salmon (Snake River Fall-run ESU) - Sn...,500.0,1.0,500.0,640.0,600.0
3,1980,Chinook Salmon (Snake River Fall-run ESU) - Sn...,450.0,1.0,450.0,500.0,640.0
4,1981,Chinook Salmon (Snake River Fall-run ESU) - Sn...,340.0,1.0,340.0,450.0,500.0


In [16]:
# Adding a column to the dataframe that holds Wild Spawner from three years prior
# Delete first row
salmon_df_filtered = salmon_df_filtered.drop(salmon_df_filtered.index[0])
salmon_df_filtered.reset_index(drop=True, inplace=True)

# Adding two years prior array as column
salmon_df_filtered['Wild_Spawners_Three_Yrs_Prior'] = three_yr_prior_wild_spawners.tolist()
salmon_df_filtered.head()

Unnamed: 0,Brood Year,Nwr Population Name,Number Of Spawners,Fracwild,Wild_Spawners,Wild_Spawners_Prev_Yr,Wild_Spawners_Two_Yrs_Prior,Wild_Spawners_Three_Yrs_Prior
0,1978,Chinook Salmon (Snake River Fall-run ESU) - Sn...,640.0,1.0,640.0,600.0,470.0,1000.0
1,1979,Chinook Salmon (Snake River Fall-run ESU) - Sn...,500.0,1.0,500.0,640.0,600.0,470.0
2,1980,Chinook Salmon (Snake River Fall-run ESU) - Sn...,450.0,1.0,450.0,500.0,640.0,600.0
3,1981,Chinook Salmon (Snake River Fall-run ESU) - Sn...,340.0,1.0,340.0,450.0,500.0,640.0
4,1982,Chinook Salmon (Snake River Fall-run ESU) - Sn...,720.0,1.0,720.0,340.0,450.0,500.0


In [17]:
# Renaming columns
salmon_df_filtered.rename({
    'Brood Year': 'Brood_Year',
    'Nwr Population Name': 'Nwr_Population_Name',
    'Number Of Spawners': 'Number_Of_Spawners',
    'Effective Catch': 'Effective_Catch'},
    inplace=True,
    axis='columns'
)
salmon_df_filtered

Unnamed: 0,Brood_Year,Nwr_Population_Name,Number_Of_Spawners,Fracwild,Wild_Spawners,Wild_Spawners_Prev_Yr,Wild_Spawners_Two_Yrs_Prior,Wild_Spawners_Three_Yrs_Prior
0,1978,Chinook Salmon (Snake River Fall-run ESU) - Sn...,640.0,1.00,640.00,600.00,470.00,1000.00
1,1979,Chinook Salmon (Snake River Fall-run ESU) - Sn...,500.0,1.00,500.00,640.00,600.00,470.00
2,1980,Chinook Salmon (Snake River Fall-run ESU) - Sn...,450.0,1.00,450.00,500.00,640.00,600.00
3,1981,Chinook Salmon (Snake River Fall-run ESU) - Sn...,340.0,1.00,340.00,450.00,500.00,640.00
4,1982,Chinook Salmon (Snake River Fall-run ESU) - Sn...,720.0,1.00,720.00,340.00,450.00,500.00
...,...,...,...,...,...,...,...,...
6878,2002,Chinook Salmon (Lower Columbia River ESU) - Wh...,1859.0,0.38,706.42,704.48,85.17,264.66
6879,2003,Chinook Salmon (Lower Columbia River ESU) - Wh...,11898.0,0.19,2260.62,706.42,704.48,85.17
6880,2004,Chinook Salmon (Lower Columbia River ESU) - Wh...,8850.0,0.19,1681.50,2260.62,706.42,704.48
6881,2005,Chinook Salmon (Lower Columbia River ESU) - Wh...,1504.0,0.18,270.72,1681.50,2260.62,706.42


In [18]:
# Create Pre-Procesing CSV
salmon_df_filtered.to_csv('Data/salmon_preprocessed_ws.csv',index=False)