In [2]:
import pandas as pd

file_dir = '/Users/markrademaker/Projects/Spatio_temporal_clustering/Data/'

## Separation in Juvenile, Small Adult and Large Adult Biomass

In [27]:
#species included
species_list = ["Limanda_limanda","Merlangius_merlangus","Clupea_harengus","Gadus_morhua","Sprattus_sprattus",
                "Pleuronectes_platessa","Eutrigla_gurnardus","Amblyraja_radiata","Callionymus_lyra"]

#life stages to separate
life_stage=["JUV","SA","LA"]

# list of tuples containing juvenile, small adult and large adult size (based on FishBase)
length_class_list = [(11,21.4),  #Limanda limanda
                     (20.2,27.8),#Merlangius merlangus
                     (12.5,16.7),#etc.
                     (31,68.3),
                     (8,10.1),
                     (24,30.8),
                     (14.5,18.8),
                     (40,46),
                     (17.4)]


length_to_weight = [(0.0068,3.14), #parameter a,b  a*length^b
                   (0.0063,3.06),
                   (0.0060,3.08),
                   (0.0071,3.08),
                   (0.0056,3.09),
                   (0.0089,3.04),
                   (0.0079,3.02),
                   (0.0105,2.94),
                   (0.0214,2.59)]

#North Sea coordinates:
min_lat= 48.00
max_lat= 62.00
min_lon= -5.00
max_lon= 13.01

#process per species
for i in range(len(species_list)):   
    species = species_list[i]
    print(species)
    if species != "Callionymus_lyra":
        
        df1=pd.read_csv(file_dir+species_list[i]+'.csv')
        indexNames = df1[ df1['LngtClass'] == 0 ].index
        df1.drop(indexNames,inplace=True)
        print(length_class_list[i][0],length_class_list[i][1])

        #drop observations outside range
        df1=df1.drop(df1[(df1["ShootLong"] < min_lon) & (df1["ShootLong"] > max_lon)].index)
        df1=df1.drop(df1[(df1["ShootLat"] < min_lat) & (df1["ShootLat"] > max_lat)].index)

        #set length class to cm and calculate weight
        df1["LngtClass_cm"]=df1["LngtClass"]/10
        print(length_to_weight[i][0],length_to_weight[i][1])
        df1["Weight"]=length_to_weight[i][0]*(df1["LngtClass_cm"]**length_to_weight[i][1])
        df1["Total_wght"]=df1["CPUE_number_per_hour"]*df1["Weight"]
        df1["Biomass_kg"]=df1["Total_wght"]/1000
        
        #separate per length class
        df1_juv=df1[df1['LngtClass_cm']< length_class_list[i][0]]
        df1_juv[species+"_"+life_stage[0]]=df1_juv['Total_wght']
        df1_juv=df1_juv.iloc[:,[0,1,2,3,4,5,6,7,8,9,-1]]

        df1_sa= df1[df1['LngtClass_cm'].between(length_class_list[i][0],length_class_list[i][1], inclusive=False)]
        df1_sa[species+"_"+life_stage[1]]=df1_sa["Biomass_kg"]
        df1_sa=df1_sa.iloc[:,[0,1,2,3,4,5,6,7,8,9,-1]]

        df1_la= df1[df1['LngtClass_cm']>length_class_list[i][1]]
        df1_la[species+"_"+life_stage[2]]=df1_la["Biomass_kg"]
        df1_la=df1_la.iloc[:,[0,1,2,3,4,5,6,7,8,9,-1]]
        
        #Merge into single dataframe and save
        frames=[df1_juv,df1_sa,df1_la]
        occ_df = pd.concat(frames)
        occ_df2=occ_df.groupby(['Survey','Year','ShootLat','ShootLong','DateTime'])[species+"_"+life_stage[0],
                                                                            species+"_"+life_stage[1],
                                                                            species+"_"+life_stage[2]].sum().reset_index()
        occ_df2['DateTime'] = pd.to_datetime(occ_df2["DateTime"])
        print(len(occ_df2))
        occ_df2.to_csv(file_dir+'%s_attributes.csv'%species)
        
    else:
        df1=pd.read_csv(file_dir+species_list[i]+'.csv')
        indexNames = df1[ df1['LngtClass'] == 0 ].index
        df1.drop(indexNames,inplace=True)

        #drop observations outside range
        df1=df1.drop(df1[(df1["ShootLong"] < min_lon) & (df1["ShootLong"] > max_lon)].index)
        df1=df1.drop(df1[(df1["ShootLat"] < min_lat) & (df1["ShootLat"] > max_lat)].index)

        #set length class to cm and calculate weight
        df1["LngtClass_cm"]=df1["LngtClass"]/10
        print(length_to_weight[i][0],length_to_weight[i][1])
        df1["Weight"]=length_to_weight[i][0]*(df1["LngtClass_cm"]**length_to_weight[i][1])
        df1["Total_wght"]=df1["CPUE_number_per_hour"]*df1["Weight"]
        df1["Biomass_kg"]=df1["Total_wght"]/1000
        
        #separate per length class
        df1_juv=df1[df1['LngtClass_cm']< length_class_list[i]]
        df1_juv[species+"_"+life_stage[0]]=df1_juv["Biomass_kg"]
        df1_juv=df1_juv.iloc[:,[0,1,2,3,4,5,6,7,8,9,-1]]
        
        df1_ad= df1[df1['LngtClass_cm']>length_class_list[i]]
        df1_ad[species+"_"+"Adult"]=df1_ad["Biomass_kg"]
        df1_ad=df1_ad.iloc[:,[0,1,2,3,4,5,6,7,8,9,-1]]
        
        frames=[df1_juv,df1_ad,]
        occ_df = pd.concat(frames)
        occ_df2=occ_df.groupby(['Survey','Year','ShootLat','ShootLong','DateTime'])[species+"_"+life_stage[0],
                                                                            species+"_"+"Adult"].sum().reset_index()
        occ_df2['DateTime'] = pd.to_datetime(occ_df2["DateTime"])
        print(len(occ_df2))
        occ_df2.to_csv(file_dir+'%s_attributes.csv'%species)

Limanda_limanda
11 21.4
0.0068 3.14


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


22188
Merlangius_merlangus
20.2 27.8
0.0063 3.06
25305
Clupea_harengus
12.5 16.7
0.006 3.08
22186
Gadus_morhua
31 68.3
0.0071 3.08
20540
Sprattus_sprattus
8 10.1
0.0056 3.09
13572
Pleuronectes_platessa
24 30.8
0.0089 3.04
18460
Eutrigla_gurnardus
14.5 18.8
0.0079 3.02
19666
Amblyraja_radiata
40 46
0.0105 2.94
8717
Callionymus_lyra
0.0214 2.59


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


8265


## Separation in Juvenile and Adult Biomass

In [26]:
import numpy as np

species_list = ["Limanda_limanda","Merlangius_merlangus","Clupea_harengus","Gadus_morhua","Sprattus_sprattus",
                "Pleuronectes_platessa","Eutrigla_gurnardus","Amblyraja_radiata","Callionymus_lyra"]


#life stages to separate
life_stage=["JUVENILE","ADULT"]

# list of tuples containing size in cm at maturation to separate juveniles from adults (based on FishBase)
length_class_list = [(11),  # Limanda limanda
                     (20.2),# Merlangius merlangus
                     (12.5),# Clupea harengus
                     (31),  # Gadus morhua
                     (8),   # Sprattus sprattus
                     (24),  # Pleuronectes platessa
                     (14.5),# Eutrigla gurnardus
                     (40),  # Amblyraja radiata
                     (17.4)]# Callionymus lyra


length_to_weight = [(0.0068,3.14), #parameter a,b  a*length^b for Limanda limanda
                   (0.0063,3.06),  # ... Merlangius merlangus
                   (0.0060,3.08),  # ... Clupea harengus
                   (0.0071,3.08),  # ... Gadus morhua
                   (0.0056,3.09),  # ... Sprattus sprattus
                   (0.0089,3.04),  # ... Pleuronectes platessa
                   (0.0079,3.02),  # ... Eutrigla gurnardus
                   (0.0105,2.94),  # ... Amblyraja radiata
                   (0.0214,2.59)]  # ... Callionymus lyra

#North Sea coordinate extent:
min_lat= 48.00
max_lat= 62.00
min_lon= -5.00
max_lon= 13.01

#process per species
for i in range(len(species_list)):   
    species = species_list[i]
    print(species)
        
    df1=pd.read_csv(file_dir+species_list[i]+'.csv')

    #drop observations outside geographic range
    df1=df1.drop(df1[(df1["ShootLong"] < min_lon) & (df1["ShootLong"] > max_lon)].index)
    df1=df1.drop(df1[(df1["ShootLat"] < min_lat) & (df1["ShootLat"] > max_lat)].index)
    
    #Keep observations of quarter 1 (consistent 1977-2020) and quarter 3 (consistent 1991-2019)
    df1 = df1.loc[(df1['Quarter'] == 1) | (df1['Quarter'] == 3)]
   
    #set length class to cm and calculate weight
    df1["LngtClass_cm"]=df1["LngtClass"]/10
    
    #length to weight conversion
    df1["Weight"]=length_to_weight[i][0]*(df1["LngtClass_cm"]**length_to_weight[i][1])
    
    #Total weight in catch by multiplying by number caught per hour
    df1["Total_wght"]=df1["CPUE_number_per_hour"]*df1["Weight"]
    
    #Convert from grams to kilograms
    df1["Biomass_kg"]=df1["Total_wght"]/1000

    #Separate dataframe per length threshold between juvenile and adult class
        #for juvenile
    df1_juv=df1[df1['LngtClass_cm']< length_class_list[i]]
    df1_juv[species+"_"+life_stage[0]]=df1_juv["Biomass_kg"]
    df1_juv=df1_juv.iloc[:,[0,1,2,3,4,5,6,7,8,9,10,-1]]
        #for adult
    df1_ad= df1[df1['LngtClass_cm']>length_class_list[i]]
    df1_ad[species+"_"+life_stage[1]]=df1_ad["Biomass_kg"]
    df1_ad=df1_ad.iloc[:,[0,1,2,3,4,5,6,7,8,9,10,-1]]

    #concatenate back together with total juvenile and adult biomass caught per as separate column in same dataframe
    frames=[df1_juv,df1_ad,]
    occ_df = pd.concat(frames)
    occ_df2=occ_df.groupby(['Survey','Year','Quarter','DateTime','ShootLat','ShootLong','Depth'])[species+"_"+life_stage[0],
                                                                        species+"_"+life_stage[1]].sum().reset_index()
    
    #set datetime column values to datetime object
    occ_df2['DateTime'] = pd.to_datetime(occ_df2["DateTime"])
    
    #ensure depth column values always expressed in absolute value
    occ_df2['Depth'] = occ_df2['Depth'].abs()
    
    #Export consistent measured data 1977-2020 of Quarter 1
    occ_df3 = occ_df2.loc[(occ_df2['Quarter']==1)]
    occ_df3 = occ_df3.to_csv(file_dir+'{}_1977_2019_juv_adult_biomass.csv'.format(species))
    #print(len(occ_df2))
    
    #Export consistent measured data 1991-2019 of Quarter 1 and 3
    occ_df3 = occ_df2.loc[(occ_df2['Year'] >= 1991) & (occ_df2['Year'] < 2020)]
    occ_df3.to_csv(file_dir+'{}_1991_2019_juv_adult_biomass.csv'.format(species))
    #print(len(occ_df3))

Clupea_harengus
0.0068 3.14


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


25015
19624
