In [1]:
# test for fish
# Import the libraries
import pandas as pd # for dataframes
import matplotlib.pyplot as plt # for plots
import seaborn as sns # for nicer plots
import re # regular expressions

# Read the data
toolik_fish = pd.read_csv(
    # Path to the data
    "../data/raw_data/toolik_fish.csv",
    # not a number values are "#N/A"
    na_values="#N/A"
    )

In [2]:
# Inspect the data
toolik_fish.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6726 entries, 0 to 6725
Data columns (total 23 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Date                  6726 non-null   object
 1   Site                  6726 non-null   object
 2   Lake                  6726 non-null   object
 3   Fish I.D.             6726 non-null   object
 4   Species               6726 non-null   object
 5   Total length (mm)     6726 non-null   object
 6   FL (mm)               6726 non-null   object
 7   SL (mm)               6173 non-null   object
 8   Mass (g)              6726 non-null   object
 9   Sampling              6726 non-null   object
 10  Otos                  6726 non-null   object
 11  Stomach               6726 non-null   object
 12  Sex                   6726 non-null   object
 13  Mature                6726 non-null   object
 14  Gonads                6726 non-null   object
 15  P1                    6726 non-null   

In [3]:
# Inspect the data
print(toolik_fish.shape)
print(toolik_fish.columns)

(6726, 23)
Index(['Date', 'Site', 'Lake', 'Fish I.D.', 'Species', 'Total length (mm)',
       'FL (mm)', 'SL (mm)', 'Mass (g)', 'Sampling', 'Otos', 'Stomach', 'Sex',
       'Mature', 'Gonads', 'P1', 'Age', 'Total otolith radius', 'DO1..DO52',
       'Fin clip', 'Locality', 'Scales', 'Comments'],
      dtype='object')


In [4]:
def snakecase(s):
        """Convert CamelCase to snake_case and lowercase the string
        s: string
        """
        s = re.sub(
        # Find a lower case letter or number (group 1)
        # followed by an upper case letter (group 2):
        '([a-z0-9])([A-Z])',
        # Replace with - 
        # \1, the lower case letter, 
        # _, an underscore, and
        # \2, the upper case letter:
        r'\1_\2',
        # Perform the search and replace in 
        # the string s:
        s 
        )
        s = re.sub(
                ' ', # Find a space
                '_', # Replace with an underscore
                s    # In the string
                ).lower()  # Convert to lower case
        return s

In [6]:
toolik_fish.columns = [
    # Replace spaces with underscores and lowercase labels
    # for each col in toolik_weather.columns
    snakecase(col) for col in toolik_fish.columns
]

# Check the cleaned column names
toolik_fish.columns

Index(['date', 'site', 'lake', 'fish_i.d.', 'species', 'total_length_(mm)',
       'fl_(mm)', 'sl_(mm)', 'mass_(g)', 'sampling', 'otos', 'stomach', 'sex',
       'mature', 'gonads', 'p1', 'age', 'total_otolith_radius', 'do1..do52',
       'fin_clip', 'locality', 'scales', 'comments'],
      dtype='object')

In [12]:
fig = plt.figure(figsize=(10, 6)) # This command sets the size of the figure

sns.histplot( # This command creates the data visualization (a strip plot)
    # The data argument sets the data that will be used to create
    # the plot. In this case, the data is the toolik_weather data
    # frame:
    data=toolik_fish, 
    # The next two arguments set the x and y axes. In this case,
    # the x axis is set to the month column and the y axis is set
    # to the daily air temperature column:
    x='date', 
    y='total_length_(mm)', 
    #jitter=0.2, # This sets the jitter of the points on the x axis
    # The hue argument sets the color of the points. In this case, 
    # the color is set by the daily air temperature:
    hue='total_length_(mm)', 
    # The palette is the set of colors that
    # will be used to color the points. In this case, the palette
    # is set to the viridis palette, which is a colorblind-friendly
    # palette:
    palette='viridis' 
)

# The title command sets the title of the plot
plt.title("Toolik Station daily air temperature")
# The xlabel command sets the x axis label
plt.xlabel("Month")
# The ylabel command sets the y axis label
plt.ylabel("Daily mean air temperatures (Celsius)")

# Finally, the .show() function displays the plot
# in the notebook output.
plt.show()

KeyboardInterrupt: 