In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
fish = pd.read_csv('../data/fish_clean.csv')

In [3]:
fish.head()

Unnamed: 0,name,sell,shadow,spawn_rates,rain_snow_catch_up,nh_jan,nh_feb,nh_mar,nh_apr,nh_may,...,sh_aug,sh_sep,sh_oct,sh_nov,sh_dec,time,min_spawn,max_spawn,where,where_sub
0,anchovy,200,Small,2–5,0,4 AM – 9 PM,4 AM – 9 PM,4 AM – 9 PM,4 AM – 9 PM,4 AM – 9 PM,...,4 AM – 9 PM,4 AM – 9 PM,4 AM – 9 PM,4 AM – 9 PM,4 AM – 9 PM,day,2,5,sea,sea
1,angelfish,3000,Small,2–5,0,,,,,4 PM – 9 AM,...,,,,4 PM – 9 AM,4 PM – 9 AM,night,2,5,river,river
2,arapaima,10000,XX-Large,1,1,,,,,,...,,,,,4 PM – 9 AM,night,1,1,river,river
3,arowana,10000,Large,1–2,0,,,,,,...,,,,,4 PM – 9 AM,night,1,2,river,river
4,barred knifejaw,5000,Medium,3–5,0,,,All day,All day,All day,...,,All day,All day,All day,All day,all,3,5,sea,sea


In [4]:
where_how_map = {
    "Sea": ["sea", "sea"],
    "River": ["river", "river"],
    "Pond": ["pond", "pond"],
    "River (clifftop)": ["river", "clifftop"],
    "Pier": ["sea", "pier"],
    "River (mouth)": ["river", "mouth"],
    "Sea (rainy days)": ["sea", "precipitation"]
}

In [5]:
shadow_order = ['X-Small',
                'Small',
                'Medium',
                'Long',
                'Large',
                'Fin',
                'X-Large',
                'XX-Large']

## Subset Fish

takes in a cleaned version of the fish df, having columns `where`, `where_sub`, `min_spawn` and `max_spawn`, plus arguments for hemisphere and month, and outputs a dataframe for only the fish that appear in that month for that hemisphere

##### V 0.1

Aka v old

In [None]:
# def find_best_fish(data, hemisphere, month):
#     '''
#     Inputs:
#     data: pandas dataframe
#         needs to have where/where_sub and min/max spawn
#     hemisphere: str
#         either "sh" or "nh"
#     month: str
#         3-letter abbreviation for month (ie "jun")
#     '''
#     cols = ['name', 'sell', 'shadow', 'spawn_rates',
#             'rain_snow_catch_up', 'where', 'where_sub',
#             'min_spawn', 'max_spawn']
#     month_col = hemisphere + "_" + month
#     cols.append(month_col)

#     data = data[cols]
    
#     data = data[~data[month_col].isna()]
    
#     # shadow_sizes = list(data['shadow'].unique())
#     loc_types = list(data['where'].unique())
    
#     data['min_spawn_perc'] = data['min_spawn'].astype(float)
#     data['max_spawn_perc'] = data['max_spawn'].astype(float)
    
#     for loc in loc_types:
#         temp = data.loc[data['where'] == loc]
#         tot_min_spawn = temp['min_spawn'].sum()
#         tot_max_spawn = temp['max_spawn'].sum()
            
#         for i in temp.index:
#             data.at[i, 'min_spawn_perc'] = round((data['min_spawn'][i]/tot_min_spawn) * 100, 2)
#             data.at[i, 'max_spawn_perc'] = round((data['max_spawn'][i]/tot_max_spawn) * 100, 2)
            
#     data['min_sell'] = (data['min_spawn_perc'] * data['sell']) / 100
#     data['max_sell'] = (data['max_spawn_perc'] * data['sell']) / 100
    
#     return data

In [8]:
def subset_fish(data, hemisphere, month):
    '''
    Inputs:
    data: pandas dataframe
        needs to have where/where_sub and min/max spawn
    hemisphere: str
        either "sh" or "nh"
    month: str
        3-letter abbreviation for month (ie "jun")
        
    Output:
    data: pandas dataframe
        version of data with only fish that appear in that month 
        for that hemisphere
    '''
    cols = ['name', 'sell', 'shadow', 'spawn_rates',
            'rain_snow_catch_up', 'where', 'where_sub',
            'min_spawn', 'max_spawn']
    month_col = hemisphere + "_" + month
    cols.append(month_col)

    monthdf = data[cols]
    
    monthdf = monthdf.loc[~data[month_col].isna()]

    return monthdf

In [79]:
nh_jul = subset_fish(fish, 'nh', 'jul')

In [80]:
nh_jul.head()

Unnamed: 0,name,sell,shadow,spawn_rates,rain_snow_catch_up,where,where_sub,min_spawn,max_spawn,nh_jul
0,anchovy,200,Small,2–5,0,sea,sea,2,5,4 AM – 9 PM
1,angelfish,3000,Small,2–5,0,river,river,2,5,4 PM – 9 AM
2,arapaima,10000,XX-Large,1,1,river,river,1,1,4 PM – 9 AM
3,arowana,10000,Large,1–2,0,river,river,1,2,4 PM – 9 AM
4,barred knifejaw,5000,Medium,3–5,0,sea,sea,3,5,All day


In [81]:
nh_jul.shape

(58, 10)

## Parse Times

In [64]:
nh_jul['nh_jul'].isna().sum()

0

In [13]:
avail_times = {
    # 4am - 9pm, aka 4:00 - 21:00
    '4 AM – 9 PM': list(range(4, 21)),

    # 9am - 4pm, aka 9:00 - 16:00
    '9 AM –\xa04 PM': list(range(9, 16)),

    # 4pm - 9am, aka 16:00 - 9:00
    '4 PM –\xa09 AM': list(range(16, 24)) + list(range(0, 9)),

    # 9pm - 4am, aka 21:00 - 4:00
    '9 PM –\xa04 AM': list(range(21, 24)) + list(range(0, 4)),

    # piranha: 9am - 4pm and 9pm - 4am
    '9 AM –\xa04 PM; 9 PM – 4 AM': list(range(9, 16)) + list(range(21, 24)) + list(range(0, 4)),

    # all day, functionally 0:00 - 24:00
    'All day': list(range(0, 24)),
}

In [65]:
nh_jul.head()

Unnamed: 0,name,sell,shadow,spawn_rates,rain_snow_catch_up,where,where_sub,min_spawn,max_spawn,nh_jul
0,anchovy,200,Small,2–5,0,sea,sea,2,5,4 AM – 9 PM
1,angelfish,3000,Small,2–5,0,river,river,2,5,4 PM – 9 AM
2,arapaima,10000,XX-Large,1,1,river,river,1,1,4 PM – 9 AM
3,arowana,10000,Large,1–2,0,river,river,1,2,4 PM – 9 AM
4,barred knifejaw,5000,Medium,3–5,0,sea,sea,3,5,All day


In [66]:
len(nh_jul)

58

In [67]:
blank_df = pd.DataFrame(np.zeros(shape=(len(nh_jul), 24), dtype='int'))

In [68]:
len(blank_df)

58

In [69]:
blank_df = blank_df.set_index(nh_jul.index)

In [70]:
blank_df.index == nh_jul.index

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True])

In [71]:
nh_jul = pd.concat([nh_jul, blank_df], axis=1)

In [72]:
for i, row in nh_jul.iterrows():
    time_value = row['nh_jul']
    cols_to_fill = avail_times[time_value]
    for col in cols_to_fill:
        nh_jul.at[i, col] = 1

In [73]:
hour_cols = list(range(0,24))
view_cols = ['name', 'nh_jul'] + hour_cols

In [75]:
nh_jul[view_cols].head()

Unnamed: 0,name,nh_jul,0,1,2,3,4,5,6,7,...,14,15,16,17,18,19,20,21,22,23
0,anchovy,4 AM – 9 PM,0,0,0,0,1,1,1,1,...,1,1,1,1,1,1,1,0,0,0
1,angelfish,4 PM – 9 AM,1,1,1,1,1,1,1,1,...,0,0,1,1,1,1,1,1,1,1
2,arapaima,4 PM – 9 AM,1,1,1,1,1,1,1,1,...,0,0,1,1,1,1,1,1,1,1
3,arowana,4 PM – 9 AM,1,1,1,1,1,1,1,1,...,0,0,1,1,1,1,1,1,1,1
4,barred knifejaw,All day,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1


In [9]:
def parse_times(monthdf, name_col):
    """
    Inputs:
    monthdf: pandas dataframe
        subset of fish data from subset_fish, based on hem and month
        
    name_col: string
        combo of hem and month, to match "hh_mmm" pattern
        example: "nh_jul"
        
    Output:
    concatdf: pandas dataframe
        monthdf plus additional columns to match hour data
    """
                
    # Defining availability times based on original values
    avail_times = {
        # 4am - 9pm, aka 4:00 - 21:00
        '4 AM – 9 PM': list(range(4, 21)),

        # 9am - 4pm, aka 9:00 - 16:00
        '9 AM –\xa04 PM': list(range(9, 16)),

        # 4pm - 9am, aka 16:00 - 9:00
        '4 PM –\xa09 AM': list(range(16, 24)) + list(range(0, 9)),

        # 9pm - 4am, aka 21:00 - 4:00
        '9 PM –\xa04 AM': list(range(21, 24)) + list(range(0, 4)),

        # piranha: 9am - 4pm and 9pm - 4am
        '9 AM –\xa04 PM; 9 PM – 4 AM': list(range(9, 16)) + list(range(21, 24)) + list(range(0, 4)),

        # all day, functionally 0:00 - 24:00
        'All day': list(range(0, 24)),
    }

    # Creating a blank df for hours 0-23, to match the monthdf
    blank_df = pd.DataFrame(np.zeros(shape=(len(monthdf), 24), dtype='int'))
    
    # Resetting the blank df index to match monthdf
    blank_df = blank_df.set_index(monthdf.index)
    
    # Adding empty columns to original monthdf
    concatdf = pd.concat([monthdf, blank_df], axis=1)
    
    for i, row in concatdf.iterrows():
        time_value = row[name_col]
        cols_to_fill = avail_times[time_value]
        for col in cols_to_fill:
            concatdf.at[i, col] = 1
        
    return concatdf

In [99]:
nh_jun = subset_fish(fish, 'nh', 'jun')

In [100]:
nh_jun.head()

Unnamed: 0,name,sell,shadow,spawn_rates,rain_snow_catch_up,where,where_sub,min_spawn,max_spawn,nh_jun
0,anchovy,200,Small,2–5,0,sea,sea,2,5,4 AM – 9 PM
1,angelfish,3000,Small,2–5,0,river,river,2,5,4 PM – 9 AM
2,arapaima,10000,XX-Large,1,1,river,river,1,1,4 PM – 9 AM
3,arowana,10000,Large,1–2,0,river,river,1,2,4 PM – 9 AM
4,barred knifejaw,5000,Medium,3–5,0,sea,sea,3,5,All day


In [111]:
nh_jun_hour = parse_times(nh_jun, 'nh_jun')

In [112]:
nh_jun_hour.head()

Unnamed: 0,name,sell,shadow,spawn_rates,rain_snow_catch_up,where,where_sub,min_spawn,max_spawn,nh_jun,...,14,15,16,17,18,19,20,21,22,23
0,anchovy,200,Small,2–5,0,sea,sea,2,5,4 AM – 9 PM,...,1,1,1,1,1,1,1,0,0,0
1,angelfish,3000,Small,2–5,0,river,river,2,5,4 PM – 9 AM,...,0,0,1,1,1,1,1,1,1,1
2,arapaima,10000,XX-Large,1,1,river,river,1,1,4 PM – 9 AM,...,0,0,1,1,1,1,1,1,1,1
3,arowana,10000,Large,1–2,0,river,river,1,2,4 PM – 9 AM,...,0,0,1,1,1,1,1,1,1,1
4,barred knifejaw,5000,Medium,3–5,0,sea,sea,3,5,All day,...,1,1,1,1,1,1,1,1,1,1


## Find Time

Subsets fish based on availability at current time

In [114]:
nh_jun_hour.head()

Unnamed: 0,name,sell,shadow,spawn_rates,rain_snow_catch_up,where,where_sub,min_spawn,max_spawn,nh_jun,...,14,15,16,17,18,19,20,21,22,23
0,anchovy,200,Small,2–5,0,sea,sea,2,5,4 AM – 9 PM,...,1,1,1,1,1,1,1,0,0,0
1,angelfish,3000,Small,2–5,0,river,river,2,5,4 PM – 9 AM,...,0,0,1,1,1,1,1,1,1,1
2,arapaima,10000,XX-Large,1,1,river,river,1,1,4 PM – 9 AM,...,0,0,1,1,1,1,1,1,1,1
3,arowana,10000,Large,1–2,0,river,river,1,2,4 PM – 9 AM,...,0,0,1,1,1,1,1,1,1,1
4,barred knifejaw,5000,Medium,3–5,0,sea,sea,3,5,All day,...,1,1,1,1,1,1,1,1,1,1


In [115]:
current_hour = 18

In [118]:
current = nh_jun_hour.loc[nh_jun_hour[current_hour] == 1]

In [119]:
len(current) / len(nh_jun_hour)

0.7454545454545455

In [121]:
current.head()

Unnamed: 0,name,sell,shadow,spawn_rates,rain_snow_catch_up,where,where_sub,min_spawn,max_spawn,nh_jun,...,14,15,16,17,18,19,20,21,22,23
0,anchovy,200,Small,2–5,0,sea,sea,2,5,4 AM – 9 PM,...,1,1,1,1,1,1,1,0,0,0
1,angelfish,3000,Small,2–5,0,river,river,2,5,4 PM – 9 AM,...,0,0,1,1,1,1,1,1,1,1
2,arapaima,10000,XX-Large,1,1,river,river,1,1,4 PM – 9 AM,...,0,0,1,1,1,1,1,1,1,1
3,arowana,10000,Large,1–2,0,river,river,1,2,4 PM – 9 AM,...,0,0,1,1,1,1,1,1,1,1
4,barred knifejaw,5000,Medium,3–5,0,sea,sea,3,5,All day,...,1,1,1,1,1,1,1,1,1,1


In [186]:
for col in current.columns.to_list():
    print(isinstance(col, str))

True
True
True
True
True
True
True
True
True
True
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False


In [10]:
def subset_by_hour(concatdf, current_hour):
    """
    Inputs:
    concatdf: pandas dataframe
        output of parse_times, showing fish available for the hem/month
        and including hour cols
    current_hour: int
        current hour in military time, between 0 and 23

    Output:
    current: pandas dataframe
        subset of all fish available at that hour
    """
    # Don't want all of those hour cols after this
    # So, finding all non_hour_cols ny the type of the col name
    non_hour_cols = [c for c in concatdf.columns.to_list()
                     if isinstance(c, str) == True]

    current = concatdf.loc[concatdf[current_hour] == 1]
    return current[non_hour_cols]

In [189]:
jun_3am_test = subset_by_hour(nh_jun_hour, 3)

In [190]:
jun_3am_test.head()

Unnamed: 0,name,sell,shadow,spawn_rates,rain_snow_catch_up,where,where_sub,min_spawn,max_spawn,nh_jun
1,angelfish,3000,Small,2–5,0,river,river,2,5,4 PM – 9 AM
2,arapaima,10000,XX-Large,1,1,river,river,1,1,4 PM – 9 AM
3,arowana,10000,Large,1–2,0,river,river,1,2,4 PM – 9 AM
4,barred knifejaw,5000,Medium,3–5,0,sea,sea,3,5,All day
5,barreleye,15000,Small,1,0,sea,sea,1,1,9 PM – 4 AM


## Find Min/Max Percentages

Using insight from "bees" - guy behind [newhorizonshq](https://newhorizonshq.com/)

In [160]:
jun_3am_test.head()

Unnamed: 0,name,sell,shadow,spawn_rates,rain_snow_catch_up,where,where_sub,min_spawn,max_spawn,nh_jun
1,angelfish,3000,Small,2–5,0,river,river,2,5,4 PM – 9 AM
2,arapaima,10000,XX-Large,1,1,river,river,1,1,4 PM – 9 AM
3,arowana,10000,Large,1–2,0,river,river,1,2,4 PM – 9 AM
4,barred knifejaw,5000,Medium,3–5,0,sea,sea,3,5,All day
5,barreleye,15000,Small,1,0,sea,sea,1,1,9 PM – 4 AM


In [158]:
sub_test = jun_3am_test.loc[(jun_3am_test['where'] == "pond")]

In [159]:
sub_test

Unnamed: 0,name,sell,shadow,spawn_rates,rain_snow_catch_up,where,where_sub,min_spawn,max_spawn,nh_jun
13,carp,300,Large,3–9,0,pond,pond,3,9,All day
14,catfish,800,Large,6–8,0,pond,pond,6,8,4 PM – 9 AM
19,crawfish,200,Small,4–12,0,pond,pond,4,12,All day
26,frog,120,Small,7–9,0,pond,pond,7,9,All day
27,gar,6000,X-Large,1–2,0,pond,pond,1,2,4 PM – 9 AM
31,goldfish,1300,X-Small,1–4,0,pond,pond,1,4,All day
36,killifish,300,X-Small,3–4,0,pond,pond,3,4,All day
38,koi,4000,Large,1–4,0,pond,pond,1,4,4 PM – 9 AM
74,tadpole,100,X-Small,5–6,0,pond,pond,5,6,All day


In [161]:
sub_test.loc[:,'min_else'] = 0

In [162]:
sub_test.loc[:,'max_else'] = 0

In [163]:
sub_test.loc[:,'min_perc'] = 0.0

In [164]:
sub_test.loc[:,'max_perc'] = 0.0

In [165]:
sub_test.head()

Unnamed: 0,name,sell,shadow,spawn_rates,rain_snow_catch_up,where,where_sub,min_spawn,max_spawn,nh_jun,min_else,max_else,min_perc,max_perc
13,carp,300,Large,3–9,0,pond,pond,3,9,All day,0,0,0.0,0.0
14,catfish,800,Large,6–8,0,pond,pond,6,8,4 PM – 9 AM,0,0,0.0,0.0
19,crawfish,200,Small,4–12,0,pond,pond,4,12,All day,0,0,0.0,0.0
26,frog,120,Small,7–9,0,pond,pond,7,9,All day,0,0,0.0,0.0
27,gar,6000,X-Large,1–2,0,pond,pond,1,2,4 PM – 9 AM,0,0,0.0,0.0


In [166]:
min_perc_list = []
max_perc_list = []

for i in sub_test.index:
    min_tot = sub_test['min_spawn'].sum()
    max_tot = sub_test['max_spawn'].sum()
    
    i_min = sub_test['min_spawn'][i]
    i_max = sub_test['max_spawn'][i]
    
    min_else = min_tot - i_min
    max_else = max_tot - i_max
    
    sub_test.at[i, 'min_else'] = min_else
    sub_test.at[i, 'max_else'] = max_else

    sub_test.at[i, 'min_perc'] = round((i_min / (max_else + i_min)), 3)
    sub_test.at[i, 'max_perc'] = round((i_max / (min_else + i_max)), 3)

In [11]:
def find_minmax_perc(sub_group):
    """
    Shoutout to bees, creator of newhorizonshq.com , for the insight
    
    Input:
    sub_group: pandas dataframe
        slice of fish df, needs min_spawn and max_spawn cols
    
    Output:
    sub_group_with_perc: pandas dataframe
        input df plus two more columns, min_perc and max_perc
    """
    
    # Creating new columns to add min/max percs
    sub_group.loc[:,'min_perc'] = 0.0
    sub_group.loc[:,'max_perc'] = 0.0
    
    for i in sub_group.index:
        # Grabbing min/max value for that row for ease of use
        i_min = sub_group['min_spawn'][i]
        i_max = sub_group['max_spawn'][i]
    
        # Grabbing min/max of all other rows by taking sum and
        # subtracting relevant row
        min_else = sub_group['min_spawn'].sum() - i_min
        max_else = sub_group['max_spawn'].sum() - i_max
        
        # Finding min and max percentages
        sub_group.at[i, 'min_perc'] = i_min / (max_else + i_min)
        sub_group.at[i, 'max_perc'] = i_max / (min_else + i_max)
    return sub_group

In [168]:
sub_test2 = jun_3am_test.loc[(jun_3am_test['where'] == 'pond') &
                             (jun_3am_test['where_sub'] == 'pond')]

In [169]:
sub_test2_plus = find_minmax_perc(sub_test2)

In [171]:
sub_test2_plus

Unnamed: 0,name,sell,shadow,spawn_rates,rain_snow_catch_up,where,where_sub,min_spawn,max_spawn,nh_jun,min_perc,max_perc
13,carp,300,Large,3–9,0,pond,pond,3,9,All day,0.057692,0.243243
14,catfish,800,Large,6–8,0,pond,pond,6,8,4 PM – 9 AM,0.107143,0.242424
19,crawfish,200,Small,4–12,0,pond,pond,4,12,All day,0.08,0.307692
26,frog,120,Small,7–9,0,pond,pond,7,9,All day,0.125,0.272727
27,gar,6000,X-Large,1–2,0,pond,pond,1,2,4 PM – 9 AM,0.017544,0.0625
31,goldfish,1300,X-Small,1–4,0,pond,pond,1,4,All day,0.018182,0.117647
36,killifish,300,X-Small,3–4,0,pond,pond,3,4,All day,0.052632,0.125
38,koi,4000,Large,1–4,0,pond,pond,1,4,4 PM – 9 AM,0.018182,0.117647
74,tadpole,100,X-Small,5–6,0,pond,pond,5,6,All day,0.087719,0.1875


## Finding/Printing Subgroup Dataframes

In [89]:
where_how_map.values()

dict_values([['sea', 'sea'], ['river', 'river'], ['pond', 'pond'], ['river', 'clifftop'], ['sea', 'pier'], ['river', 'mouth'], ['sea', 'precipitation']])

v old version of this code:

In [90]:
# for combo in where_how_map.values():

#     sub_group = nh_jul.loc[(nh_jul['where'] == combo[0]) &
#                            (nh_jul['where_sub'] == combo[1])]

#     # For basic sea, basic river, or pond
#     if combo[0] == combo[1]:
#         print(combo[0].title())

#     else:
#         print(f"{combo[0].title()}: {combo[1].title()}")

#     for size in shadow_order:
#         # Sub sub looks at location by hem/month and then into shadow size
#         sub_sub = sub_group.loc[sub_group['shadow'] == size][[
#             'name', 'sell', 'min_spawn', 'max_spawn',
#             'nh_jul', 'rain_snow_catch_up']]
#         # Finding the length of the subset
#         num_sub = len(sub_sub)

#         # Ignoring sections where sub is nonexistent
#         if num_sub == 0:
#             continue

#         # Finding min and max numbers per sub sub
#         sub_sub_min = sub_sub['min_spawn'].sum()
#         sub_sub_max = sub_sub['max_spawn'].sum()

#         sub_sub['min_spawn_perc'] = sub_sub['min_spawn'] / sub_sub_min
#         sub_sub['max_spawn_perc'] = sub_sub['max_spawn'] / sub_sub_max

#         sub_sub['avg_spawn_perc'] = (
#             sub_sub['min_spawn_perc'] + sub_sub['max_spawn_perc']) / 2

#         print(
#             f"{size}: {sub_sub_min} - {sub_sub_max} (number of fish: {num_sub})")
#         print(sub_sub[['name', 'sell', 'min_spawn', 'max_spawn',
#                        'min_spawn_perc', 'max_spawn_perc', 'avg_spawn_perc',
#                        'nh_jul', 'rain_snow_catch_up']].T)
#         print("-----")

In [93]:
for combo in where_how_map.values():

    sub_group = test.loc[(test['where'] == combo[0]) &
                         (test['where_sub'] == combo[1])]
    
    if len(sub_group) == 0:
        continue
    
    # For basic sea, basic river, or pond
    if combo[0] == combo[1]:
        print("=" * 80)
        print(combo[0].title())
        print("=" * 80)

    else:
        print("=" * 80)
        print(f"{combo[0].title()}: {combo[1].title()}")
        print("=" * 80)

    for size in shadow_order:
        # Sub sub looks at location by hem/month and then into shadow size
        sub_sub = sub_group.loc[sub_group['shadow'] == size]
        
        if len(sub_sub) == 0:
            continue
        
        # Finding min/max percs based on sub shadow groups
        sub_sub_plus = find_minmax_perc(sub_sub)
        
        # Finding the length of the subset
        num_sub = len(sub_sub_plus)

        # Ignoring sections where sub is nonexistent
        if num_sub == 0:
            continue
    
        print(
            f"Number of {size} fish: {num_sub}")
        print(sub_sub_plus[['name', 'sell', 'min_perc', 'max_perc', 
                       'nh_jul', 'rain_snow_catch_up']].T)
        print("-----")

Sea
Number of X-Small fish: 2
                            17         65
name                clown fish  sea horse
sell                       650       1100
min_perc              0.416667   0.454545
max_perc              0.545455   0.583333
nh_jul                 All day    All day
rain_snow_catch_up           0          0
-----
Number of Small fish: 5
                             0            5               12              35  \
name                    anchovy    barreleye  butterfly fish  horse mackerel   
sell                        200        15000            1000             150   
min_perc              0.0645161    0.0294118        0.121212        0.518519   
max_perc               0.192308    0.0434783        0.208333             0.7   
nh_jul              4 AM – 9 PM  9 PM – 4 AM         All day         All day   
rain_snow_catch_up            0            0               0               0   

                             72  
name                surgeonfish  
sell             

In [191]:
where_how_map.values()

dict_values([['sea', 'sea'], ['river', 'river'], ['pond', 'pond'], ['river', 'clifftop'], ['sea', 'pier'], ['river', 'mouth'], ['sea', 'precipitation']])

## Finding Display Parameters

In [12]:
fish_hem_mon = subset_fish(fish, 'nh', 'jul')

In [13]:
fish_hem_mon_times = parse_times(fish_hem_mon, 'nh_jul')

In [14]:
current = subset_by_hour(fish_hem_mon_times, 22)

In [15]:
current.head()

Unnamed: 0,name,sell,shadow,spawn_rates,rain_snow_catch_up,where,where_sub,min_spawn,max_spawn,nh_jul
1,angelfish,3000,Small,2–5,0,river,river,2,5,4 PM – 9 AM
2,arapaima,10000,XX-Large,1,1,river,river,1,1,4 PM – 9 AM
3,arowana,10000,Large,1–2,0,river,river,1,2,4 PM – 9 AM
4,barred knifejaw,5000,Medium,3–5,0,sea,sea,3,5,All day
5,barreleye,15000,Small,1,0,sea,sea,1,1,9 PM – 4 AM


In [16]:
current['where_sub'].unique()

array(['river', 'sea', 'pier', 'pond', 'precipitation'], dtype=object)

In [18]:
def find_current_locs(current_df):
    current_locs = []
    for i in current_df.index:
        row_loc = [current_df['where'][i], current_df['where_sub'][i]]
        if row_loc in current_locs:
            continue
        else:
            current_locs.append(row_loc)
    return current_locs

In [19]:
current.index

Int64Index([ 1,  2,  3,  4,  5,  8, 10, 12, 13, 14, 17, 18, 19, 20, 22, 25, 26,
            27, 29, 31, 32, 34, 35, 36, 38, 40, 48, 51, 54, 58, 59, 60, 62, 63,
            65, 66, 68, 71, 72, 73, 74, 75, 77, 79],
           dtype='int64')

In [20]:
current_locs = find_current_locs(current)

In [21]:
current_locs

[['river', 'river'],
 ['sea', 'sea'],
 ['sea', 'pier'],
 ['pond', 'pond'],
 ['sea', 'precipitation']]

In [22]:
current_locs[0][0] != current_locs[0][1]

False

In [23]:
'Small' in list(current['shadow'])

True

In [24]:
shadow_order

['X-Small', 'Small', 'Medium', 'Long', 'Large', 'Fin', 'X-Large', 'XX-Large']

In [25]:
current.head()

Unnamed: 0,name,sell,shadow,spawn_rates,rain_snow_catch_up,where,where_sub,min_spawn,max_spawn,nh_jul
1,angelfish,3000,Small,2–5,0,river,river,2,5,4 PM – 9 AM
2,arapaima,10000,XX-Large,1,1,river,river,1,1,4 PM – 9 AM
3,arowana,10000,Large,1–2,0,river,river,1,2,4 PM – 9 AM
4,barred knifejaw,5000,Medium,3–5,0,sea,sea,3,5,All day
5,barreleye,15000,Small,1,0,sea,sea,1,1,9 PM – 4 AM


In [26]:
for loc in current_locs:
    if loc[0] != loc[1]:
        title = f"{loc[0].title()}: {loc[1].title()}"
        spec_loc = current.loc[current['where_sub'] == loc[1]]
        rest_loc = current.loc[(current['where'] == loc[0]) & (
            current['where_sub'] == loc[0])]
        loc_current = pd.concat([spec_loc, rest_loc])
    else:
        title = loc[0].title()
        loc_current = current.loc[(current['where'] == loc[0]) & (
            current['where_sub'] == loc[0])]
    print(title)
    print(loc_current[['name', 'where', 'where_sub']])

River
               name  where where_sub
1         angelfish  river     river
2          arapaima  river     river
3           arowana  river     river
8        black bass  river     river
20     crucian carp  river     river
22             dace  river     river
25  freshwater goby  river     river
51          piranha  river     river
60   saddled bichir  river     river
66  snapping turtle  river     river
73        sweetfish  river     river
75          tilapia  river     river
Sea
                 name where where_sub
4     barred knifejaw   sea       sea
5           barreleye   sea       sea
12     butterfly fish   sea       sea
17         clown fish   sea       sea
32  great white shark   sea       sea
34   hammerhead shark   sea       sea
35     horse mackerel   sea       sea
48     olive flounder   sea       sea
54        puffer fish   sea       sea
58        red snapper   sea       sea
59         ribbon eel   sea       sea
62          saw shark   sea       sea
63           se

In [256]:
def find_current_shadows(current_loc_df):

    shadow_order = ['X-Small', 'Small', 'Medium',
                    'Long', 'Large', 'Fin', 
                    'X-Large', 'XX-Large']
    current_shadows_ordered = []
    for shadowsize in shadow_order:
        if shadowsize in list(current_loc_df['shadow']):
            current_shadows_ordered.append(shadowsize)
    return current_shadows_ordered

In [257]:
for loc in current_locs:
    fish_at_loc = current_hem_mon.loc[(current_hem_mon['where'] == loc[0]) &
                                      (current_hem_mon['where_sub'] == loc[1])]
    loc_shadows = find_current_shadows(fish_at_loc)
    print(loc, loc_shadows)

['sea', 'sea'] ['X-Small', 'Small', 'Medium', 'Long', 'Large', 'Fin', 'X-Large', 'XX-Large']
['river', 'river'] ['Small', 'Medium', 'Large', 'X-Large', 'XX-Large']
['sea', 'pier'] ['X-Large', 'XX-Large']
['pond', 'pond'] ['X-Small', 'Small', 'Large', 'X-Large']
['sea', 'precipitation'] ['XX-Large']
