In [1]:
pip install pandas requests



In [2]:

pip install beautifulsoup4



In [3]:
import requests
import os
import pandas as pd
from urllib.parse import urljoin

In [4]:
from bs4 import BeautifulSoup

def get_csv_urls(base_url):
    csv_urls = []


    for year in range(1995, 2025):
        year_url = urljoin(base_url, f"{year}/")
        response = requests.get(year_url)
        if response.status_code == 200:

            soup = BeautifulSoup(response.text, 'html.parser')

            for link in soup.find_all('a'):
                href = link.get('href')
                if href.endswith('water_temperature.csv'):
                    csv_file_url = urljoin(year_url, href)
                    csv_urls.append(csv_file_url)
        else:
            print(f"Failed to access folder for year {year}")

    return csv_urls


def download_and_combine_csvs(csv_urls):
    """
    Function to download CSVs from a list of URLs and combine them into a single DataFrame.
    """
    df_list = []

    for url in csv_urls:
        print(f"Downloading: {url}")
        response = requests.get(url)
        if response.status_code == 200:

            df = pd.read_csv(url)
            df_list.append(df)
        else:
            print(f"Failed to download: {url}")


    combined_df = pd.concat(df_list, ignore_index=True)
    return combined_df


base_url = "https://data.gcoos.org/data/waf/csv_by_observation/"


csv_urls = get_csv_urls(base_url)

combined_watertemp = download_and_combine_csvs(csv_urls)

print(combined_watertemp)


Downloading: https://data.gcoos.org/data/waf/csv_by_observation/1995/gcoos_1995_04_sea_water_temperature.csv
Downloading: https://data.gcoos.org/data/waf/csv_by_observation/1995/gcoos_1995_05_sea_water_temperature.csv
Downloading: https://data.gcoos.org/data/waf/csv_by_observation/1995/gcoos_1995_06_sea_water_temperature.csv
Downloading: https://data.gcoos.org/data/waf/csv_by_observation/1995/gcoos_1995_07_sea_water_temperature.csv
Downloading: https://data.gcoos.org/data/waf/csv_by_observation/1995/gcoos_1995_08_sea_water_temperature.csv
Downloading: https://data.gcoos.org/data/waf/csv_by_observation/1995/gcoos_1995_09_sea_water_temperature.csv
Downloading: https://data.gcoos.org/data/waf/csv_by_observation/1995/gcoos_1995_10_sea_water_temperature.csv
Downloading: https://data.gcoos.org/data/waf/csv_by_observation/1995/gcoos_1995_11_sea_water_temperature.csv
Downloading: https://data.gcoos.org/data/waf/csv_by_observation/1995/gcoos_1995_12_sea_water_temperature.csv
Downloading: https:

In [5]:
print(combined_watertemp.columns)

Index(['network', 'platform', 'latitude', 'longitude', 'date',
       'vertical_datum', 'sea_water_temperature', 'qcFlag ',
       'sea_water_temperature '],
      dtype='object')


In [6]:

combined_watertemp['sea_water_temperature_combined'] = combined_watertemp['sea_water_temperature'].fillna(combined_watertemp['sea_water_temperature '])


combined_watertemp = combined_watertemp.drop(['sea_water_temperature', 'sea_water_temperature '], axis=1)


combined_watertemp = combined_watertemp.rename(columns={'sea_water_temperature_combined': 'sea_water_temperature'})

print(combined_watertemp.head())


  network             platform  latitude  longitude                  date  \
0    TABS  ioos:station:TABS:C   28.8116   -94.7433  1995-04-02T00:00:00Z   
1    TABS  ioos:station:TABS:C   28.8116   -94.7433  1995-04-02T18:00:00Z   
2    TABS  ioos:station:TABS:C   28.8116   -94.7433  1995-04-02T18:30:00Z   
3    TABS  ioos:station:TABS:C   28.8116   -94.7433  1995-04-02T19:00:00Z   
4    TABS  ioos:station:TABS:C   28.8116   -94.7433  1995-04-02T19:30:00Z   

   vertical_datum     qcFlag   sea_water_temperature  
0            -2.0  11111222.0                  19.24  
1            -2.0  11111222.0                  19.24  
2            -2.0  11111222.0                  19.33  
3            -2.0  11111222.0                  19.37  
4            -2.0  11111222.0                  19.35  


In [7]:
count_nan = combined_watertemp['sea_water_temperature'].isna().sum()
print(count_nan)

323


In [8]:
combined_watertemp = combined_watertemp.dropna(subset=['sea_water_temperature'])

In [9]:
combined_watertemp['date'] = combined_watertemp['date'].astype(str)

combined_watertemp['year'] = combined_watertemp['date'].str[:4].astype(int)


In [None]:
fig, ax = plt.subplots()
bp = ax.boxplot(combined_watertemp['sea_water_temperature'])


median = bp['medians'][0].get_ydata()[0]
lower_quartile = bp['boxes'][0].get_ydata()[0]
upper_quartile = bp['boxes'][0].get_ydata()[2]
lower_whisker = bp['whiskers'][0].get_ydata()[1]
upper_whisker = bp['whiskers'][1].get_ydata()[1]
outliers = bp['fliers'][0].get_ydata()

print("Median:", median)
print("Lower Quartile:", lower_quartile)
print("Upper Quartile:", upper_quartile)
print("Lower Whisker:", lower_whisker)
print("Upper Whisker:", upper_whisker)
print("Outliers:", outliers)

In [None]:
combined_watertemp1 = combined_watertemp[combined_watertemp['sea_water_temperature'] <= 45.5]
combined_watertemp1 = combined_watertemp1[combined_watertemp1['sea_water_temperature'] >= 0.54]


In [1]:
import matplotlib.pyplot as plt
import numpy as np


x = combined_watertemp1['year']
y = combined_watertemp1['sea_water_temperature']

plt.scatter(x, y)
plt.xlabel("Date")
plt.ylabel("Water Temperature (Celsuius)")
plt.title("Gulf of Mexico Water Temperatures")
plt.show()

NameError: name 'combined_watertemp1' is not defined

In [None]:
import seaborn as sns

plt.figure(figsize=(12, 6))
sns.boxplot(x='year', y='sea_water_temperature', data=combined_watertemp1)


plt.title('Sea Water Temperatures by Year')
plt.xlabel('Year')
plt.ylabel('Temperature (Celsius)')
plt.xticks(rotation=45)


plt.tight_layout()
plt.show()

SPECIES: Atlantic sharpnose shark, brown rock shrimp, Mueller's Sea Pansy, Paper Scallop, Emerald Parrotfish

quick visualization on depth, lat/long for each species

analysis: does a change in prey depth/lat/long have any correlation to changes in predator depth/lat/long? how long does it take to see a change, if any?

CLIMATE: temperature (and any other climate variables)

quick visualization on temperature trends over the past few decades

machine learning: predict future climate patterns

possiblity 1: try multiple different methods to see what has the best accuracy

possibility 2: taking into account that temperature have increaed more in recent years than past (and past averages will drag down future predicted temps), try models using different amounts of data (50 years, 20 years, 10 years, 5 years, 3 years)

machine learning: combine species data and temperature data to predict future species data