## This Notebook generates a CSV for Table 1a
The CSV has the structure of Table 1a, except for the Total row at the bottom. The presentation and the Total row as in the paper was done in another tool.

In [28]:
from pathlib import Path
import pandas as pd

In [29]:
# define data location constants
DATA_DIR = Path('..') / 'source_data'
WILD_FISH_DATA = DATA_DIR / 'all_wild_fish_lice.csv'
WILD_EVENT_DATA = DATA_DIR / 'all_wild_sample_events.csv'

OUTPUT_DIR = Path('..') / 'output' / 'Table_1a'

In [30]:
# import the data
events_df = pd.read_csv(WILD_EVENT_DATA)
events_df['event_id'] = events_df['event_id'].astype(str)
fish_df = pd.read_csv(WILD_FISH_DATA)
fish_df['event_id'] = fish_df['event_id'].astype(str)


  fish_df = pd.read_csv(WILD_FISH_DATA)


In [31]:

# make a year column
events_df['sampledate'] = pd.to_datetime(events_df.sampledate, errors='raise', utc=True)
events_df['year'] = events_df['sampledate'].dt.year

## Calculate the "All Events" column

In [32]:
# group by year
events_year_group = events_df.groupby(['year'])

# get the number of sample events each year
num_events_df = events_year_group['event_id'].count().reset_index()
num_events_df.rename(columns={'event_id': 'All Events'}, inplace=True)
num_events_df.head()

Unnamed: 0,year,All Events
0,2001,16
1,2002,31
2,2003,1807
3,2004,1109
4,2005,969


## Calculate the "Events with Fish" column

In [33]:
# join events and fish data, merge 'right' to only include events that have fish
wild_with_fish_df = pd.merge(events_df, fish_df, how='right', on=['event_id'])

In [34]:
# get a row for each year-event_id combination
wild_with_fish_year_group = wild_with_fish_df.groupby(['year', 'event_id'])
num_events_with_fish_id_df = wild_with_fish_year_group['fish_id'].count().reset_index()
num_events_with_fish_id_df.head()

Unnamed: 0,year,event_id,fish_id
0,2001.0,scfs_1.0,5
1,2003.0,1,8
2,2003.0,10,16
3,2003.0,10011,36
4,2003.0,10013,41


In [35]:
# group by year and count the unique event_id for each year
num_events_with_fish_group = num_events_with_fish_id_df.groupby('year')
num_events_with_fish_df = num_events_with_fish_group['event_id'].count().reset_index()
num_events_with_fish_df.rename(columns={'event_id': 'Events with fish'}, inplace=True)
num_events_with_fish_df.head()

Unnamed: 0,year,Events with fish
0,2001.0,1
1,2003.0,1237
2,2004.0,757
3,2005.0,555
4,2006.0,671


## Calculate numbers of fish caught each year

In [36]:
# merge events and fish with all the entries
all_events_fish_df = pd.merge(events_df, fish_df, how='left', on='event_id')
all_events_fish_df.head()

Unnamed: 0,event_id,sampledate,region,dfozone,sample_site,latitude,longitude,source,year,fish_id,...,lep_unknown,cal_cop,cal_chal,cal_motile,cal_unknown,unknown_cop,unknown_chal,unknown_motile,unknown_unknown,lice_protocol
0,1,2003-05-13 00:00:00+00:00,Broughton Archipelago,3_3,Adeane Point,50.71978,-125.6795,Fisheries and Oceans Canada,2003,1715,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1,1,2003-05-13 00:00:00+00:00,Broughton Archipelago,3_3,Adeane Point,50.71978,-125.6795,Fisheries and Oceans Canada,2003,1716,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2,1,2003-05-13 00:00:00+00:00,Broughton Archipelago,3_3,Adeane Point,50.71978,-125.6795,Fisheries and Oceans Canada,2003,1717,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
3,1,2003-05-13 00:00:00+00:00,Broughton Archipelago,3_3,Adeane Point,50.71978,-125.6795,Fisheries and Oceans Canada,2003,1718,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
4,1,2003-05-13 00:00:00+00:00,Broughton Archipelago,3_3,Adeane Point,50.71978,-125.6795,Fisheries and Oceans Canada,2003,1719,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,


In [37]:
# pivot and count to get counts of each species of fish each year
year_species_df = pd.pivot_table(all_events_fish_df, values='region', index='year', columns=['fish_species'],
                                    aggfunc='count', fill_value=0).reset_index()

year_species_df

fish_species,year,Chinook Salmon,Chum Salmon,Coho Salmon,Other Species,Pacific Herring,Pink Salmon,Sockeye Salmon,Three-Spined Stickleback
0,2001,0,0,0,0,0,5,0,0
1,2003,1011,15175,2909,1614,828,9853,64,2886
2,2004,754,19157,104,1,93,2707,2,1479
3,2005,405,4888,128,1,0,4074,271,45
4,2006,214,13402,170,695,0,10512,9,10
5,2007,345,18421,165,1,0,12606,50,1350
6,2008,521,15515,190,0,0,18845,10,1601
7,2009,126,14766,136,65,6,15064,2,2567
8,2010,137,5210,226,0,0,5946,24,26
9,2011,181,7006,320,18,60,5773,1,55


## Calculate the total numbers of fish each year

In [38]:
# calculate total number of fish each year
total_fish_df = all_events_fish_df.groupby('year')['fish_id'].count().reset_index()
total_fish_df.head(10)

Unnamed: 0,year,fish_id
0,2001,5
1,2002,0
2,2003,34340
3,2004,24297
4,2005,9812
5,2006,25012
6,2007,32938
7,2008,36682
8,2009,32732
9,2010,11569


## Merge all the results into the final table

In [39]:
# counts of all the events and events with fish each year
table_1a_df = pd.merge(num_events_df, num_events_with_fish_df, on='year', how='left')
# add in the counts of species each year
table_1a_df = pd.merge(table_1a_df, year_species_df, on='year', how='left')
# add the total number of fish
table_1a_df = pd.merge(table_1a_df, total_fish_df, on='year', how='left')
table_1a_df.head(10)

Unnamed: 0,year,All Events,Events with fish,Chinook Salmon,Chum Salmon,Coho Salmon,Other Species,Pacific Herring,Pink Salmon,Sockeye Salmon,Three-Spined Stickleback,fish_id
0,2001,16,1.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,5
1,2002,31,,,,,,,,,,0
2,2003,1807,1237.0,1011.0,15175.0,2909.0,1614.0,828.0,9853.0,64.0,2886.0,34340
3,2004,1109,757.0,754.0,19157.0,104.0,1.0,93.0,2707.0,2.0,1479.0,24297
4,2005,969,555.0,405.0,4888.0,128.0,1.0,0.0,4074.0,271.0,45.0,9812
5,2006,1098,671.0,214.0,13402.0,170.0,695.0,0.0,10512.0,9.0,10.0,25012
6,2007,1127,926.0,345.0,18421.0,165.0,1.0,0.0,12606.0,50.0,1350.0,32938
7,2008,1214,923.0,521.0,15515.0,190.0,0.0,0.0,18845.0,10.0,1601.0,36682
8,2009,1168,931.0,126.0,14766.0,136.0,65.0,6.0,15064.0,2.0,2567.0,32732
9,2010,732,567.0,137.0,5210.0,226.0,0.0,0.0,5946.0,24.0,26.0,11569


In [40]:
# rename columns nicely for publication
table_1a_df.rename(
    columns={'year': 'Year', 'All Events': 'All events',
             'Chum Salmon': 'Chum', 'Pink Salmon': 'Pink', 'Coho Salmon': 'Coho', 'Chinook Salmon': 'Chinook',
             'Sockeye Salmon': 'Sockeye', 'Three-Spined Stickleback': 'Stickleback', 'Other Species': 'Other species',
             'fish_id': 'All fish'},
    inplace=True
)

# order the columns
col_order = ['Year', 'All events', 'Events with fish','Chum','Pink', 'Coho', 'Chinook', 'Sockeye',
             'Stickleback', 'Pacific Herring', 'Other species', 'All fish']
table_1a_df = table_1a_df.reindex(columns=col_order)
table_1a_df

Unnamed: 0,Year,All events,Events with fish,Chum,Pink,Coho,Chinook,Sockeye,Stickleback,Pacific Herring,Other species,All fish
0,2001,16,1.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,5
1,2002,31,,,,,,,,,,0
2,2003,1807,1237.0,15175.0,9853.0,2909.0,1011.0,64.0,2886.0,828.0,1614.0,34340
3,2004,1109,757.0,19157.0,2707.0,104.0,754.0,2.0,1479.0,93.0,1.0,24297
4,2005,969,555.0,4888.0,4074.0,128.0,405.0,271.0,45.0,0.0,1.0,9812
5,2006,1098,671.0,13402.0,10512.0,170.0,214.0,9.0,10.0,0.0,695.0,25012
6,2007,1127,926.0,18421.0,12606.0,165.0,345.0,50.0,1350.0,0.0,1.0,32938
7,2008,1214,923.0,15515.0,18845.0,190.0,521.0,10.0,1601.0,0.0,0.0,36682
8,2009,1168,931.0,14766.0,15064.0,136.0,126.0,2.0,2567.0,6.0,65.0,32732
9,2010,732,567.0,5210.0,5946.0,226.0,137.0,24.0,26.0,0.0,0.0,11569


In [41]:
# export the table to a CSV for final formatting
table_1a_df.to_csv(OUTPUT_DIR / "Table_1a.csv", index=False)