## This Notebook integrates all the wild data sources.
### This includes our data from a variety of sources, as well as data from Salmon Coast Field Station, Cedar Creek Field Station, and the Hakai Institute

In [22]:
import pandas as pd
from pathlib import Path

## Configuration variables for this notebook

In [23]:

# paths to files for our data
wild_data_dir = Path('.')
events_filepath = wild_data_dir / 'wild_sample_events.csv'
fish_lice_filepath = wild_data_dir / 'wild_fish_lice.csv'

# paths to the Salmon Coast Field Station data files
scfs_events_filepath = wild_data_dir / 'salmon_coast_wild_sample_events.csv'
scfs_fish_lice_filepath = wild_data_dir / 'salmon_coast_wild_fish_lice.csv'

# paths to the Cedar Creek Field Station data files
ccfs_events_filepath = wild_data_dir / 'cedar_coast_wild_sample_events.csv'
ccfs_fish_lice_filepath = wild_data_dir / 'cedar_coast_wild_fish_lice.csv'

# paths to the Hakai Institute data files
hakai_events_filepath = wild_data_dir / 'hakai_wild_sample_events.csv'
hakai_fish_lice_filepath = wild_data_dir / 'hakai_wild_fish_lice.csv'

# output paths for writing the combined data
all_events_filepath = wild_data_dir / 'all_wild_sample_events.csv'
all_fish_lice_filepath = wild_data_dir / 'all_wild_fish_lice.csv'

## Join all the event data

In [24]:
# load the event data files
events_df = pd.read_csv(events_filepath)
scfs_events_df = pd.read_csv(scfs_events_filepath)
ccfs_events_df = pd.read_csv(ccfs_events_filepath)
hakai_events_df = pd.read_csv(hakai_events_filepath)

In [25]:
# concatenate all the files
all_events_df = pd.concat([events_df, scfs_events_df, ccfs_events_df, hakai_events_df], ignore_index=True, sort=False)
all_events_df.head()

Unnamed: 0,event_id,sampledate,region,dfozone,sample_site,latitude,longitude,source
0,1,2003-05-13,Broughton Archipelago,3_3,Adeane Point,50.71978,-125.6795,Fisheries and Oceans Canada
1,2,2003-05-21,Broughton Archipelago,3_3,Adeane Point,50.71978,-125.6795,Fisheries and Oceans Canada
2,3,2003-05-26,Broughton Archipelago,3_3,Adeane Point,50.7197,-125.6795,Fisheries and Oceans Canada
3,4,2003-06-02,Broughton Archipelago,3_3,Adeane Point,50.7197,-125.6795,Fisheries and Oceans Canada
4,5,2003-05-13,Broughton Archipelago,3_3,Adeane Point,50.7384,-125.67985,Fisheries and Oceans Canada


In [26]:
# write out to CSV
all_events_df.to_csv(all_events_filepath, index=False)

## Join all the fish/lice data

In [27]:
# load the fish/lice files
fish_lice_df = pd.read_csv(fish_lice_filepath)
scfs_fish_lice_df = pd.read_csv(scfs_fish_lice_filepath)
ccfs_fish_lice_df = pd.read_csv(ccfs_events_filepath)
hakai_fish_lice_df = pd.read_csv(hakai_fish_lice_filepath)

In [28]:
# concatenate all the files
all_fish_lice_df = pd.concat([fish_lice_df, scfs_fish_lice_df, ccfs_fish_lice_df, hakai_fish_lice_df],
                             ignore_index=True, sort=False)
all_fish_lice_df.head()

Unnamed: 0,event_id,fish_id,length,weight,height,fish_species,lep_cop,lep_chal,lep_motile,lep_unknown,...,unknown_motile,unknown_unknown,sampledate,region,dfozone,sample_site,latitude,longitude,source,lice_protocol
0,5666,1,44.0,0.96,,Chum Salmon,1.0,0.0,0.0,0.0,...,0.0,0.0,,,,,,,,
1,5666,2,39.0,0.61,,Pink Salmon,0.0,1.0,0.0,0.0,...,0.0,0.0,,,,,,,,
2,5666,3,45.0,0.94,,Pink Salmon,0.0,0.0,0.0,0.0,...,0.0,0.0,,,,,,,,
3,5666,4,43.0,0.84,,Chum Salmon,0.0,0.0,0.0,0.0,...,0.0,0.0,,,,,,,,
4,5666,5,38.0,0.53,,Pink Salmon,0.0,0.0,0.0,0.0,...,0.0,0.0,,,,,,,,


In [29]:
# write out to csv
all_fish_lice_df.to_csv(all_fish_lice_filepath, index=False)