## This Notebook formats the Salmon Coast Research Station wild sampling data to fit the format of our wild sampling data.
### To integrate it with other wild data, run 'Integrate_Wild_Data.ipynb' after generating the formatted data.

The data was downloaded on 13 March 2025 from [GitHub](https://github.com/CedarCoastFieldStation/Sea-lice-database). The [Cedar Coast site](https://cedarcoastsociety.org)

In [352]:
import pandas as pd
from pathlib import Path

### Configuration variables for this notebook
Adjust for your local setup

In [353]:
# paths to the source Cedar Creek data files
ccfs_wild_data_dir = Path('CedarCoast') / 'Data'
ccfs_events_filepath = ccfs_wild_data_dir / 'ClayoquotSeaLice_Site_Data.csv'
ccfs_fish_lice_filepath = ccfs_wild_data_dir / 'ClayoquotSeaLice_fishData.csv'

# output paths for writing the formatted Cedar Creek Field Station data
wild_data_dir = Path('.')
ccfs_formatted_events_filepath = wild_data_dir / 'cedar_coast_wild_sample_events.csv'
ccfs_formatted_lice_filepath = wild_data_dir / 'cedar_coast_wild_fish_lice.csv'

# Format the event file

We think that the "Site ID" is intended to be the same as the "event_id" field that we use - a unique label for a location and date.
However, there are some duplicate "Site ID" in the Site_Data file, and many fish in fishData that don't correspond to "Site ID" in Site_Data.

Our solution is to construct an events file from the information given, with the following properties:
- event_id is "ccfs_" + location + date
- there will be one lat/long for each location string
- lat/long is the mean of all the lat/long for that location in Site_Data
- the list of unique event_id is the union of unique event_id from Site_Data and fishData

In [354]:
# read in the event file and parse the year, month, day columns into a date
ccfs_events_df = pd.read_csv(ccfs_events_filepath, parse_dates={'sampledate': ['year', 'month', 'day']})
ccfs_events_df.head()

Unnamed: 0,sampledate,Site ID,time,location,salt_surf,salt_1m,temp_surf,temp_1m,salmon_captured,salmon_examined,...,chum_examined,sockeye_examined,morts_recovery,morts_other,P_ratio,C_ratio,latitude,longitude,bycatch,notes
0,2018-04-26,1,,Buckle Bay,,,,,47.0,47.0,...,47.0,,,,,,,,,
1,2018-05-16,2,,Buckle Bay,28.04,28.11,11.88,11.72,35.0,16.0,...,16.0,,,,,,49°10.921,--125°57.883,1 oolichan,
2,2018-05-16,3,,Elbow Bank,26.73,26.65,12.12,12.14,53.0,53.0,...,48.0,,,,,,49°12.290,--125°57.134,"tube snout, 4 lingcod, 4 flatfish, 2 sculpin, ...",
3,2018-05-16,4,,Cypre River,25.1,25.18,12.28,12.12,8.0,8.0,...,,,,,,,49°16.192,--125°54.358,tube snout,
4,2018-05-10,5,,Elbow Bank,24.8,24.81,11.56,11.47,26.0,26.0,...,26.0,,,,,,49°11.798,-125°56.885,"1 sculpin, 25-30 flatfish, juvenile cod",


In [355]:
# there are a couple of inconsistencies in the location names
# define a function as we'll need to do this to the fish_lice dataframe as well
def fix_locations(dataframe):
    """
    Renames 'location' as 'sample_site' and makes some manual fixes so the strings are consistent.
    Modifies in place.

    :param dataframe: The dataframe to make a sample_site column
    :type dataframe: pandas.DataFrame object
    """
    # remove any surrounding spaces
    dataframe['sample_site'] = dataframe.location.str.strip()

    # replace some names for consistency
    dataframe['sample_site'].replace('Bedwell estuary', 'Bedwell Estuary', inplace=True)
    dataframe['sample_site'].replace('Bedwell estuary 2', 'Bedwell Estuary 2', inplace=True)
    dataframe['sample_site'].replace('Bedwell estuary 3', 'Bedwell Estuary 3', inplace=True)
    dataframe['sample_site'].replace('Bedwell estuary 4', 'Bedwell Estuary 4', inplace=True)
    dataframe['sample_site'].replace('Cypre', 'Cypre River', inplace=True)
    dataframe['sample_site'].replace('Meares North', 'North Meares', inplace=True)
    dataframe['sample_site'].replace('Cancer', 'Cancer (Herbert)', inplace=True)
    dataframe['sample_site'].replace('Tranquil estuary', 'Tranquil Estuary', inplace=True)

In [356]:
fix_locations(ccfs_events_df)

In [357]:
# define a function as we'll need to do this to the fish_lice dataframe as well
def make_event_id(dataframe):
    """
    Make a new column in the dataframe "event_id" that is "ccfs_" + sample_site + sampledate
    Assumes the dataframe has columns 'sample_site' and 'sampledate'
    Modifies in place.

    :param dataframe: A dataframe to add event_id column to
    :type dataframe: pandas.DataFrame object
    """
    dataframe['event_id'] = 'ccfs_' + dataframe['sample_site'] + '_' + dataframe.sampledate.dt.strftime('%Y-%m-%d')

    # replace spaces with '_' to avoid multiple words
    dataframe['event_id'] = dataframe['event_id'].str.replace(' ', '_')

In [358]:
make_event_id(ccfs_events_df)
ccfs_events_df.head()

Unnamed: 0,sampledate,Site ID,time,location,salt_surf,salt_1m,temp_surf,temp_1m,salmon_captured,salmon_examined,...,morts_recovery,morts_other,P_ratio,C_ratio,latitude,longitude,bycatch,notes,sample_site,event_id
0,2018-04-26,1,,Buckle Bay,,,,,47.0,47.0,...,,,,,,,,,Buckle Bay,ccfs_Buckle_Bay_2018-04-26
1,2018-05-16,2,,Buckle Bay,28.04,28.11,11.88,11.72,35.0,16.0,...,,,,,49°10.921,--125°57.883,1 oolichan,,Buckle Bay,ccfs_Buckle_Bay_2018-05-16
2,2018-05-16,3,,Elbow Bank,26.73,26.65,12.12,12.14,53.0,53.0,...,,,,,49°12.290,--125°57.134,"tube snout, 4 lingcod, 4 flatfish, 2 sculpin, ...",,Elbow Bank,ccfs_Elbow_Bank_2018-05-16
3,2018-05-16,4,,Cypre River,25.1,25.18,12.28,12.12,8.0,8.0,...,,,,,49°16.192,--125°54.358,tube snout,,Cypre River,ccfs_Cypre_River_2018-05-16
4,2018-05-10,5,,Elbow Bank,24.8,24.81,11.56,11.47,26.0,26.0,...,,,,,49°11.798,-125°56.885,"1 sculpin, 25-30 flatfish, juvenile cod",,Elbow Bank,ccfs_Elbow_Bank_2018-05-10


In [359]:
def convert_deg_min_to_dec_deg(deg_min):
    """
    Converts a lat or long expressed as a string with degrees and decimal minutes, to a float of decimal degrees.
    :param deg_min: Degree and decimal minutes, e.g. "--125°57.883"
    :type deg_min: str
    :return: Decimal degree equivalent
    :rtype: float
    """
    # sometimes it's null - just return it
    if pd.isnull(deg_min):
        return deg_min

    # split the string at the degree symbol. strip to remove any extra spaces
    try:
        degree_str, minute_str = [x.strip() for x in deg_min.split('°')]
    except AttributeError as e:
        print("{} ({})".format(deg_min, type(deg_min)))
        raise e

    # sometimes the degree has '--' at the front
    if degree_str.startswith('--'):
        degree_str = degree_str[1:]

    degree = float(degree_str)
    minute = float(minute_str) if degree > 0 else -float(minute_str)
    dec_degree = degree + minute / 60

    return dec_degree

In [360]:
# convert latitude and longitude to decimal degrees
ccfs_events_df.latitude = ccfs_events_df.latitude.apply(convert_deg_min_to_dec_deg)
ccfs_events_df.longitude = ccfs_events_df.longitude.apply(convert_deg_min_to_dec_deg)

ccfs_events_df.head()

Unnamed: 0,sampledate,Site ID,time,location,salt_surf,salt_1m,temp_surf,temp_1m,salmon_captured,salmon_examined,...,morts_recovery,morts_other,P_ratio,C_ratio,latitude,longitude,bycatch,notes,sample_site,event_id
0,2018-04-26,1,,Buckle Bay,,,,,47.0,47.0,...,,,,,,,,,Buckle Bay,ccfs_Buckle_Bay_2018-04-26
1,2018-05-16,2,,Buckle Bay,28.04,28.11,11.88,11.72,35.0,16.0,...,,,,,49.182017,-125.964717,1 oolichan,,Buckle Bay,ccfs_Buckle_Bay_2018-05-16
2,2018-05-16,3,,Elbow Bank,26.73,26.65,12.12,12.14,53.0,53.0,...,,,,,49.204833,-125.952233,"tube snout, 4 lingcod, 4 flatfish, 2 sculpin, ...",,Elbow Bank,ccfs_Elbow_Bank_2018-05-16
3,2018-05-16,4,,Cypre River,25.1,25.18,12.28,12.12,8.0,8.0,...,,,,,49.269867,-125.905967,tube snout,,Cypre River,ccfs_Cypre_River_2018-05-16
4,2018-05-10,5,,Elbow Bank,24.8,24.81,11.56,11.47,26.0,26.0,...,,,,,49.196633,-125.948083,"1 sculpin, 25-30 flatfish, juvenile cod",,Elbow Bank,ccfs_Elbow_Bank_2018-05-10


In [361]:
len(ccfs_events_df)

184

In [362]:
# make a lat/long lookup dataframe with mean lat/long
site_group = ccfs_events_df.groupby('sample_site')
position_lookup = site_group[['latitude', 'longitude']].mean()
position_lookup.head()

Unnamed: 0_level_0,latitude,longitude
sample_site,Unnamed: 1_level_1,Unnamed: 2_level_1
Bare Bluff,49.331303,-125.798773
Barkley Sound,49.024417,-125.2888
Bedwell 2,49.321752,-125.816217
Bedwell 3,49.26799,-125.837204
Bedwell Estuary,49.3124,-125.814778


### !! Pause processing events until getting the fish events !!

# Format the fish and lice data file

In [363]:
# load the fish and lice data files
ccfs_fish_lice_df = pd.read_csv(ccfs_fish_lice_filepath, parse_dates={'sampledate': ['year', 'month', 'day']})
ccfs_fish_lice_df.head()

Unnamed: 0,sampledate,fish_id,site_id,location,fish_num,species,length,height,Lep_cope,chalA,...,white_eye,Grazed_gill_plate,Lironca,comments,sum_all_lice,Prevalence,Motile Lep,Motile Caligus,chal,attached
0,2018-04-26,1,1,Buckle Bay,1,chum,44.0,5.0,1.0,2.0,...,,,,,3.0,1.0,0.0,0.0,2.0,3.0
1,2018-04-26,2,1,Buckle Bay,2,chum,46.0,7.0,,1.0,...,,,,,2.0,1.0,0.0,0.0,2.0,2.0
2,2018-04-26,3,1,Buckle Bay,3,chum,49.0,6.0,,2.0,...,,,,,4.0,1.0,0.0,0.0,4.0,4.0
3,2018-04-26,4,1,Buckle Bay,4,chum,48.0,9.0,,3.0,...,,,,,3.0,1.0,0.0,0.0,3.0,3.0
4,2018-04-26,5,1,Buckle Bay,5,chum,50.0,10.0,,1.0,...,,,,,2.0,1.0,0.0,0.0,2.0,2.0


In [364]:
# make a sample_site column and event_id column
fix_locations(ccfs_fish_lice_df)
make_event_id(ccfs_fish_lice_df)
ccfs_fish_lice_df.head()

Unnamed: 0,sampledate,fish_id,site_id,location,fish_num,species,length,height,Lep_cope,chalA,...,Lironca,comments,sum_all_lice,Prevalence,Motile Lep,Motile Caligus,chal,attached,sample_site,event_id
0,2018-04-26,1,1,Buckle Bay,1,chum,44.0,5.0,1.0,2.0,...,,,3.0,1.0,0.0,0.0,2.0,3.0,Buckle Bay,ccfs_Buckle_Bay_2018-04-26
1,2018-04-26,2,1,Buckle Bay,2,chum,46.0,7.0,,1.0,...,,,2.0,1.0,0.0,0.0,2.0,2.0,Buckle Bay,ccfs_Buckle_Bay_2018-04-26
2,2018-04-26,3,1,Buckle Bay,3,chum,49.0,6.0,,2.0,...,,,4.0,1.0,0.0,0.0,4.0,4.0,Buckle Bay,ccfs_Buckle_Bay_2018-04-26
3,2018-04-26,4,1,Buckle Bay,4,chum,48.0,9.0,,3.0,...,,,3.0,1.0,0.0,0.0,3.0,3.0,Buckle Bay,ccfs_Buckle_Bay_2018-04-26
4,2018-04-26,5,1,Buckle Bay,5,chum,50.0,10.0,,1.0,...,,,2.0,1.0,0.0,0.0,2.0,2.0,Buckle Bay,ccfs_Buckle_Bay_2018-04-26


In [365]:
# now that we have event_id for fish, merge into the events file
ccfs_events_df = pd.merge(ccfs_events_df, ccfs_fish_lice_df, on=['event_id', 'sample_site', 'sampledate'], how='outer')

# only want one of each event_id
ccfs_events_df.drop_duplicates(subset=['event_id'], ignore_index=True, inplace=True)

len(ccfs_events_df)

154

In [366]:
# rename Cedar Coast columns that have equivalent data
ccfs_fish_lice_df.rename(columns={'species': 'fish_species',
                                  'Lep_cope': 'lep_cop',
                                  'Caligus_cope': 'cal_cop',
                                  'unid_cope': 'unknown_cop'},
                         inplace=True)

In [367]:
# make sure Cedar Coast fish get globally unique IDs - use the same prefix method as event IDs
ccfs_fish_lice_df['fish_id'] = 'ccfs_' + ccfs_fish_lice_df.fish_id.astype(str).str.strip()
ccfs_fish_lice_df.head()

Unnamed: 0,sampledate,fish_id,site_id,location,fish_num,fish_species,length,height,lep_cop,chalA,...,Lironca,comments,sum_all_lice,Prevalence,Motile Lep,Motile Caligus,chal,attached,sample_site,event_id
0,2018-04-26,ccfs_1,1,Buckle Bay,1,chum,44.0,5.0,1.0,2.0,...,,,3.0,1.0,0.0,0.0,2.0,3.0,Buckle Bay,ccfs_Buckle_Bay_2018-04-26
1,2018-04-26,ccfs_2,1,Buckle Bay,2,chum,46.0,7.0,,1.0,...,,,2.0,1.0,0.0,0.0,2.0,2.0,Buckle Bay,ccfs_Buckle_Bay_2018-04-26
2,2018-04-26,ccfs_3,1,Buckle Bay,3,chum,49.0,6.0,,2.0,...,,,4.0,1.0,0.0,0.0,4.0,4.0,Buckle Bay,ccfs_Buckle_Bay_2018-04-26
3,2018-04-26,ccfs_4,1,Buckle Bay,4,chum,48.0,9.0,,3.0,...,,,3.0,1.0,0.0,0.0,3.0,3.0,Buckle Bay,ccfs_Buckle_Bay_2018-04-26
4,2018-04-26,ccfs_5,1,Buckle Bay,5,chum,50.0,10.0,,1.0,...,,,2.0,1.0,0.0,0.0,2.0,2.0,Buckle Bay,ccfs_Buckle_Bay_2018-04-26


## Convert Cedar Coast fish species to match the names we use
Our names: ['Chum Salmon', 'Pink Salmon', 'Coho Salmon',
       'Three-Spined Stickleback', 'Chinook Salmon', 'Other Species',
       'Pacific Herring', 'Sockeye Salmon']

In [368]:
# check the fish species in the SRS dataset
list(ccfs_fish_lice_df.fish_species.unique())

['chum',
 'coho',
 'pink',
 'chinook',
 'chum ',
 'coho ',
 'sockeye',
 'herring',
 'chinook ']

In [369]:
# remove extra surrounding spaces
ccfs_fish_lice_df.fish_species.str.strip()
# replace salmon species names as the string literals differ
ccfs_fish_lice_df.replace({'fish_species': {'chum': 'Chum Salmon',
                                          'chum ': 'Chum Salmon',
                                          'coho': 'Coho Salmon',
                                          'coho ': 'Coho Salmon',
                                          'pink': 'Pink Salmon',
                                          'chinook': 'Chinook Salmon',
                                          'chinook ': 'Chinook Salmon',
                                          'sockeye': 'Sockeye Salmon',
                                          'herring': 'Pacific Herring',
                                          }},
                     inplace=True)
list(ccfs_fish_lice_df.fish_species.unique())

['Chum Salmon',
 'Coho Salmon',
 'Pink Salmon',
 'Chinook Salmon',
 'Sockeye Salmon',
 'Pacific Herring']

## Convert Cedar Coast lice counts to have the same groupings that we use

In [370]:
# convert all the NaN in lice counts to 0
# first, make sure our expression checking for NaN works
assert ccfs_fish_lice_df.loc[:, 'lep_cop':'unid_adult'].isnull().values.any()

In [371]:
# convert all the NaN lice counts to 0
col_names = ccfs_fish_lice_df.loc[:, 'lep_cop':'unid_adult'].columns
fill_dict = {col: 0 for col in col_names}
ccfs_fish_lice_df.fillna(value=fill_dict, inplace=True)

# confirm that they are all converted
assert not ccfs_fish_lice_df.loc[:, 'lep_cop':'unid_adult'].isnull().values.any()

In [372]:
# generate lice count fields to match our data from the SRS lice counts
ccfs_fish_lice_df['unknown_chal'] = ccfs_fish_lice_df['chalA'] + \
                               ccfs_fish_lice_df['chalB'] + \
                               ccfs_fish_lice_df['chal_unid']

ccfs_fish_lice_df['lep_motile'] = ccfs_fish_lice_df['Lep_PAmale'] + \
                            ccfs_fish_lice_df['Lep_PAfemale'] + \
                            ccfs_fish_lice_df['Lep_male'] + \
                            ccfs_fish_lice_df['Lep_nongravid'] + \
                            ccfs_fish_lice_df['Lep_gravid']

ccfs_fish_lice_df['cal_motile'] = ccfs_fish_lice_df['Caligus_mot'] + ccfs_fish_lice_df['Caligus_gravid']

ccfs_fish_lice_df['unknown_motile'] = ccfs_fish_lice_df['unid_PA'] + ccfs_fish_lice_df['unid_adult']

ccfs_fish_lice_df['unknown_unknown'] = 0

In [373]:
# remove all the redundant working columns
# easiest (not most efficient) way is to load our data file and use that list of columns
db_fish_lice_data_df = pd.read_csv(wild_data_dir / "wild_fish_lice.csv")
ccfs_fish_lice_df = ccfs_fish_lice_df.reindex(columns=db_fish_lice_data_df.columns)
ccfs_fish_lice_df.head()

Unnamed: 0,event_id,fish_id,length,weight,height,fish_species,lep_cop,lep_chal,lep_motile,lep_unknown,cal_cop,cal_chal,cal_motile,cal_unknown,unknown_cop,unknown_chal,unknown_motile,unknown_unknown
0,ccfs_Buckle_Bay_2018-04-26,ccfs_1,44.0,,5.0,Chum Salmon,1,,0.0,,0.0,,0.0,,0.0,2.0,0.0,0
1,ccfs_Buckle_Bay_2018-04-26,ccfs_2,46.0,,7.0,Chum Salmon,0,,0.0,,0.0,,0.0,,0.0,2.0,0.0,0
2,ccfs_Buckle_Bay_2018-04-26,ccfs_3,49.0,,6.0,Chum Salmon,0,,0.0,,0.0,,0.0,,0.0,4.0,0.0,0
3,ccfs_Buckle_Bay_2018-04-26,ccfs_4,48.0,,9.0,Chum Salmon,0,,0.0,,0.0,,0.0,,0.0,3.0,0.0,0
4,ccfs_Buckle_Bay_2018-04-26,ccfs_5,50.0,,10.0,Chum Salmon,0,,0.0,,0.0,,0.0,,0.0,2.0,0.0,0


In [374]:
# write out to CSV file
ccfs_fish_lice_df.to_csv(ccfs_formatted_lice_filepath, index=False)

# Finish off the events file processing

In [375]:
ccfs_events_df.head()

Unnamed: 0,sampledate,Site ID,time,location_x,salt_surf,salt_1m,temp_surf,temp_1m,salmon_captured,salmon_examined,...,white_eye,Grazed_gill_plate,Lironca,comments,sum_all_lice,Prevalence,Motile Lep,Motile Caligus,chal,attached
0,2018-04-26,1.0,,Buckle Bay,,,,,47.0,47.0,...,,,,,3.0,1.0,0.0,0.0,2.0,3.0
1,2018-05-16,2.0,,Buckle Bay,28.04,28.11,11.88,11.72,35.0,16.0,...,,,,CS,4.0,1.0,1.0,0.0,3.0,3.0
2,2018-05-16,3.0,,Elbow Bank,26.73,26.65,12.12,12.14,53.0,53.0,...,,,,,14.0,1.0,4.0,0.0,9.0,10.0
3,2018-05-16,4.0,,Cypre River,25.1,25.18,12.28,12.12,8.0,8.0,...,,,,CS,4.0,1.0,0.0,2.0,1.0,2.0
4,2018-05-10,5.0,,Elbow Bank,24.8,24.81,11.56,11.47,26.0,26.0,...,,,,,7.0,1.0,0.0,0.0,4.0,7.0


In [376]:
# add in columns needed for future processing
ccfs_events_df['region'] = 'Clayoquot Sound'
ccfs_events_df['dfozone'] = '2_3'
ccfs_events_df['source'] = 'Cedar Coast Field Station'

In [377]:
# use the lookup table to fill in the lat/long
ccfs_events_df['latitude'] = ccfs_events_df.sample_site.apply(lambda site: position_lookup.loc[site, 'latitude'])
ccfs_events_df['longitude'] = ccfs_events_df.sample_site.apply(lambda site: position_lookup.loc[site, 'longitude'])
ccfs_events_df.head()

Unnamed: 0,sampledate,Site ID,time,location_x,salt_surf,salt_1m,temp_surf,temp_1m,salmon_captured,salmon_examined,...,comments,sum_all_lice,Prevalence,Motile Lep,Motile Caligus,chal,attached,region,dfozone,source
0,2018-04-26,1.0,,Buckle Bay,,,,,47.0,47.0,...,,3.0,1.0,0.0,0.0,2.0,3.0,Clayoquot Sound,2_3,Cedar Coast Field Station
1,2018-05-16,2.0,,Buckle Bay,28.04,28.11,11.88,11.72,35.0,16.0,...,CS,4.0,1.0,1.0,0.0,3.0,3.0,Clayoquot Sound,2_3,Cedar Coast Field Station
2,2018-05-16,3.0,,Elbow Bank,26.73,26.65,12.12,12.14,53.0,53.0,...,,14.0,1.0,4.0,0.0,9.0,10.0,Clayoquot Sound,2_3,Cedar Coast Field Station
3,2018-05-16,4.0,,Cypre River,25.1,25.18,12.28,12.12,8.0,8.0,...,CS,4.0,1.0,0.0,2.0,1.0,2.0,Clayoquot Sound,2_3,Cedar Coast Field Station
4,2018-05-10,5.0,,Elbow Bank,24.8,24.81,11.56,11.47,26.0,26.0,...,,7.0,1.0,0.0,0.0,4.0,7.0,Clayoquot Sound,2_3,Cedar Coast Field Station


In [378]:
# limit to the common columns with the main events file
ccfs_events_df = ccfs_events_df.reindex(columns=['event_id', 'sampledate', 'region',
                                                 'dfozone', 'sample_site', 'latitude',
                                                 'longitude', 'source'])
ccfs_events_df.head()

Unnamed: 0,event_id,sampledate,region,dfozone,sample_site,latitude,longitude,source
0,ccfs_Buckle_Bay_2018-04-26,2018-04-26,Clayoquot Sound,2_3,Buckle Bay,49.172229,-125.964604,Cedar Coast Field Station
1,ccfs_Buckle_Bay_2018-05-16,2018-05-16,Clayoquot Sound,2_3,Buckle Bay,49.172229,-125.964604,Cedar Coast Field Station
2,ccfs_Elbow_Bank_2018-05-16,2018-05-16,Clayoquot Sound,2_3,Elbow Bank,49.200812,-125.950004,Cedar Coast Field Station
3,ccfs_Cypre_River_2018-05-16,2018-05-16,Clayoquot Sound,2_3,Cypre River,49.277706,-125.900085,Cedar Coast Field Station
4,ccfs_Elbow_Bank_2018-05-10,2018-05-10,Clayoquot Sound,2_3,Elbow Bank,49.200812,-125.950004,Cedar Coast Field Station


In [379]:
# make sure the event_ids in events_df are all unique
assert len(ccfs_events_df.event_id.unique()) == len(ccfs_events_df)

In [380]:
# make sure all event_ids in fish_lice are in events_df
assert len(set(ccfs_fish_lice_df.event_id.unique()) - set(ccfs_events_df.event_id.unique())) == 0

In [381]:
# write out to a CSV
ccfs_events_df.to_csv(ccfs_formatted_events_filepath, index=False)