# Title Test

In [1]:
import plotly.express as px
import pandas as pd
import numpy as np
import scipy
from scipy.stats import pearsonr
pd.options.mode.chained_assignment = None

COMMON_TO_SCIENTIFIC = {"Anchovy, northern": 'Engraulis.mordax', 
"Mackerel, jack": 'Trachurus.symmetricus', 
"Mackerel, Pacific": 'Scomber.japonicus',
"Opah": 'Lampridiformes1',
"Sardine, Pacific": 'Sardinops.sagax',
# "Yellowtail": 'Seriola.lalandi' # Not enough datapoints for yellowtail
}



def group_by_year(scientificName, commonName, treat_NaN_as_zeros = False):
    """
        Returns a DataFrame with columns [Year, Fishery, Larva] where each column is thesum of pounds caught for the species in that year.

        scientificName: Scientific name of the target fish (for larval data)
        commonName: Common name of the target fish (for fishery data) 
        treat_NaN_as_zeros: setting it to True will treat a missing larval catch for a certain year to 0. Setting it to False will ignore the whole year (False default)
    """

    # Find all catches with the current species
    catches_with_species = cleaned_fishery[cleaned_fishery['Species Name'] == commonName] 

    # interpret them as floats and group by sums of pounds per year
    catches_with_species.loc[:,'Year'] = catches_with_species['Year'].astype(float)
    catches_with_species.loc[:,'Pounds'] = catches_with_species['Pounds'].astype(float)
    catches_with_species = catches_with_species.groupby('Year').sum()


    # find all larval catches with the sums of pounds per year and sum them
    caught_species_larva = larva_orig[larva_orig[scientificName] != 0]
    caught_species_larva = caught_species_larva[caught_species_larva['year'] > 1980]
    caught_species_larva = caught_species_larva.groupby('year').sum()

    larva_array = caught_species_larva[scientificName]
    result = []

    for year, caught_pounds in catches_with_species['Pounds'].iteritems():
        try:
            result.append(np.array([year, caught_pounds, larva_array[int(year)]])) # this will fail if there were no larval catches for this species for the year!
        except:
            if treat_NaN_as_zeros:
                result.append(np.array([year, caught_pounds, 0])) # if we want to treat a nonexistant larval catch as 0

    # Result will be a 2 dimensional np array where the first column is a year, so construct a dataframe from it
    return pd.DataFrame(data = np.array(result), columns=["Year", "Fishery", "Larva"])

def local_correlation(df):
    """
        Returns a pearson correlation between the Fishery and Larva columns of the dataframe passed in
    """
    return scipy.stats.pearsonr(df['Fishery'], df['Larva'])



def offset_larva_catch(scientificName, commonName, offset, treat_NaN_as_zeros = False):
    """
        Returns a modified version of the dataset where the Fishery Catches are shifted later by the offset. For example, if a certain fish had n catches in 2008, and offset is 2, 
        the returned dataset would have n in 2010. This is useful in calculating correlation with offset year.

        scientificName: Scientific name of the target fish (for larval data)
        commonName: Common name of the target fish (for fishery data) 
        offset: the amount of years to shift fishery catches by. If offset is negative, the larva data will be set back.
        treat_NaN_as_zeros: setting it to True will treat a missing larval catch for a certain year to 0. Setting it to False will ignore the whole year (False default)
    """
    orig_dataset = group_by_year(scientificName, commonName, treat_NaN_as_zeros).to_numpy() # convert the dataset to numpy for easier indexing
    result = []
    if offset > 0:
        for i in range(len(orig_dataset) - abs(offset)):
            result.append(np.array([orig_dataset[i+offset][0], orig_dataset[i+offset][1], orig_dataset[i][2]])) #append the row with the offset fishery
    else:
        for i in range(len(orig_dataset) - abs(offset)):
            result.append(np.array([orig_dataset[i][0], orig_dataset[i][1], orig_dataset[i-offset][2]])) #if negative, append the row with the offset larva backwards
    if not result:
        return pd.DataFrame(columns=["Year", "Fishery", "Larva"])
    return pd.DataFrame(data = np.array(result), columns=["Year", "Fishery", "Larva"]) #convert back to dataframe and return it

    


larva_orig = pd.read_csv('data/Fishlarvaldata_Capstone_2021_FromAndrewThompson_updated 1804 1904 1507 1607 1601 1704 1604 1501 1407 1311 ichthyoplankton by line and station.csv')
fishery_updated = pd.read_csv('data/2232021_SummaryByQuarter_blockgrouping_87-20_210223_Redacted.csv')

# clean data 
cleaned_fishery = fishery_updated.dropna(how='any')
cleaned_fishery = cleaned_fishery[cleaned_fishery['Total Price'] != ' ']

curr_list = [] #correlations per species
scientific = 'Sardinops.sagax'
common = 'Sardine, Pacific'
for year in range(-3, 8):
    offset_df = offset_larva_catch(scientific, common, year) #find the offset of the current year
    if len(offset_df) < 2:
        continue
    corr = local_correlation(offset_df) # find the correlation with the current fish and offset
    curr_list.append(np.array([year, corr[0]]))
curr_list = np.array(curr_list)
fig = px.line(x=curr_list[:, 0], y=curr_list[:, 1])
fig.update_layout(title='Correlation vs Years Offset (Sardines)', xaxis_title="Years Offset (Fishery Catch year - Larval Catch Year)",
yaxis_title="Pearson Correlation")

fig.show()

NameError: name 'plt' is not defined