## This notebook creates CSVs with summaries of Chum and Pink examined in each zone
The table shows numbers and means of length and weight of chum and pink each migration month (March - June) in each zone.

This script produces six CSVs: (chum, pink) X (number of fish, mean length, mean weight)

In [22]:
import pandas as pd
from pathlib import Path

In [23]:
# define data location constants
DATA_DIR = Path('..') / 'source_data'
WILD_FISH_DATA = DATA_DIR / 'all_wild_fish_lice.csv'
WILD_EVENT_DATA = DATA_DIR / 'all_wild_sample_events.csv'

OUTPUT_DIR = Path('..') / 'output' / 'Table_6'

In [24]:
# load the data
wild_events_df = pd.read_csv(WILD_EVENT_DATA)
wild_events_df['event_id'] = wild_events_df['event_id'].astype(str)
wild_fish_df = pd.read_csv(WILD_FISH_DATA)
wild_fish_df['event_id'] = wild_fish_df['event_id'].astype(str)
wild_df = pd.merge(wild_events_df, wild_fish_df, on='event_id', how='left')

  wild_fish_df = pd.read_csv(WILD_FISH_DATA)


In [25]:
# create a month column
wild_df['sampledate'] = pd.to_datetime(wild_df['sampledate'], errors='coerce', utc=True)
wild_df['month'] = wild_df.sampledate.dt.month
# restrict to the out-migration months
wild_df = wild_df[wild_df.month.isin([3, 4, 5, 6])]

wild_df.month.unique()

array([5, 6, 3, 4])

In [26]:
def count_fish(species_name):
    """
    Gets the counts of wild fish sampled in each zone.
    First column is totals per zone, then a column for each month during the out-migration period (March-June).
    :param species_name: Name of the fish species to count.
    :type species_name: str
    :return: Dataframe with one row per DFO zone; columns DFO zone, total count, one per month
    :rtype: pandas.Dataframe object
    """
    # get the part of the wild that is just this species
    species_df = wild_df[wild_df.fish_species == species_name]

    # get the number of fish per zone, all year
    species_all_num_fish_df = pd.pivot_table(species_df, values='fish_id', index='dfozone',
                                          columns=['fish_species'], aggfunc='count').reset_index()

    # get the number of fish per zone, each month
    species_month_num_fish_df = pd.pivot_table(species_df, values='fish_id', index='dfozone',
                                               columns=['month'], aggfunc='count').reset_index()

    # combine all year with months
    species_num_df = pd.merge(species_all_num_fish_df, species_month_num_fish_df, on='dfozone', how='left')

    # give the columns friendlier names
    species_num_df.rename(columns={'dfozone': 'DFO Zone', species_name: 'All year', 3: 'March', 4: 'April', 5: 'May', 6:'June'},
                          inplace=True)

    return species_num_df

In [27]:
def mean_fish(species_name, field_name):
    """
    Gets the mean of a property of wild fish sampled in each zone for each month during the out-migration period (March-June).

    :param species_name: Name of the fish species.
    :type species_name: str
    :param field_name: Name of the dataframe field to take the mean from
    :type field_name: str
    :return: Dataframe with one row per DFO zone; columns DFO zone, one column per month with the mean of the field for that month
    :rtype: pandas.Dataframe object
    """
    # get the part of the wild that is just this species
    species_df = wild_df[wild_df.fish_species == species_name]

    # get the number of fish per zone, each month
    species_month_num_fish_df = pd.pivot_table(species_df, values=field_name, index='dfozone',
                                               columns=['month'], aggfunc='mean').reset_index()

    # give the columns friendlier names
    species_month_num_fish_df.rename(columns={'dfozone': 'DFO Zone', 3: 'March', 4: 'April', 5: 'May', 6:'June'},
                                     inplace=True)

    return species_month_num_fish_df

## Chum

In [28]:
# calculate the fish counts
chum_num_df = count_fish("Chum Salmon")

# write out to a CSV
chum_num_df.to_csv(OUTPUT_DIR / 'chum_number.csv', index=False)

In [29]:
# calculate the mean of the lengths
chum_length_df = mean_fish("Chum Salmon", 'length')

# write out to a CSV
chum_length_df.to_csv(OUTPUT_DIR / 'chum_length.csv', index=False)

In [30]:
# calculate the mean of the lengths
chum_weight_df = mean_fish("Chum Salmon", 'weight')

# write out to a CSV
chum_weight_df.to_csv(OUTPUT_DIR / 'chum_weight.csv', index=False)

## Pink

In [31]:
# calculate the fish counts
pink_num_df = count_fish("Pink Salmon")

# write out to a CSV
pink_num_df.to_csv(OUTPUT_DIR / 'pink_number.csv', index=False)

In [32]:
# calculate the mean of the lengths
pink_length_df = mean_fish("Pink Salmon", 'length')

# write out to a CSV
pink_length_df.to_csv(OUTPUT_DIR / 'pink_length.csv', index=False)

In [33]:
# calculate the mean of the lengths
pink_weight_df = mean_fish("Pink Salmon", 'weight')
pink_weight_df.head()

month,DFO Zone,March,April,May,June
0,2_3,,0.3,,
1,2_4,,0.403571,0.575,
2,3_1,0.354861,0.620376,0.91875,
3,3_2,,0.373223,2.089765,7.378668
4,3_3,0.260417,0.460349,1.873368,4.822033


In [34]:

# write out to a CSV
pink_weight_df.to_csv(OUTPUT_DIR / 'pink_weight.csv', index=False)