### Buoy Data
* Connecting to buoy data via csv url
* Used pandas dataframe to stage data
* Buoy 45026
* Location: Stevensville, MI
* cooridinates: 41.982 N 86.619 W (41°58'55" N 86°37'10" W)

In [1]:
import pandas as pd
import numpy as np
import re #importing regex for string parsing

In [2]:
# Connection
import pandas as pd
import numpy as np
import re #importing regex for string parsing
pd.set_option('display.max_columns', None)

#reading website data as csv
buoy_data = pd.read_csv(
    'https://www.ndbc.noaa.gov/data/realtime2/45026.txt',
    sep = r'\s+',
    header = 0,
    index_col = None
)

#dataframe to drop first record since first record represents units of measurements for fields within dataset
df = buoy_data.iloc[1:]
df1 = pd.DataFrame(df)

#### Manipulation

In [5]:
# renaming fields
df1.rename(columns = {
    '#YY':'Year','MM':'Month','DD':'Day','hh':'Hour','mm':'Minute','WDIR':'Wind_Dir','WSPD':'Wind_Speed','GST':'Wind_Gust','WVHT':'Wave_Height',
    'DPD':'Dominant_Wave_Prd','APD':'Average_Wave_Prd','MWD':'Dominant_Wave_Dir','PRES':'Sea_Level_Pres','ATMP':'Air_Temperature',
    'WTMP':'Surface_Water_Temperature','DEWP':'Dew_Point_Temp','VIS':'Station_Visibility','PTDY':'Pressure_Tendency'
},inplace=True)

# Find and Replace all 'MM' values as null as this is likely a nullable value from source data - from observation
df1.replace(to_replace='MM',value = 0,inplace=True)

# appending datetime stamp field to existing df
df1.insert(19,'Timestamp',(df1['Year'] + '-' + df1['Month'] + '-' + df1['Day']+ ' ' + df1['Hour'] + ':' + df1['Minute']).astype('str'),True)
df1.insert(20,'yyyy-mm-dd',(df1['Year'] + '-' + df1['Month'] + '-' + df1['Day']).astype('str'))

#grouping and sorting dataframe by timestamp field| ensure most current record at top of file
df1.sort_values(by = 'Timestamp',ascending = False, inplace = True)

#############################################
#conversions
df1.insert(14,'Air_Temp_Deg_F',((df1['Air_Temperature'].astype('float')*(9/5))+32)) #converting air temp to fahrenheit
#df1.insert(15,'Surface_Water_Temp',((df1['WTMP'].astype('float')*(9/5))+32)) # converting surface water temp to fahrenheit
df1.insert(16,'Surface_Water_Temp_Deg_F',((df1['Surface_Water_Temperature'].astype('float')*(9/5))+32)) # converting surface water temp to fahrenheit
df1.insert(18,'DewPoint_Temp_Deg_F',((df1['Dew_Point_Temp'].astype('float')*(9/5))+32)) # converting dewpoint temp to fahrenheit
df1.insert(7,'Wind_Speed_MPH',(df1['Wind_Speed'].astype('float')*2.237)) # converting windspeed to MPH
df1.insert(9,'Wind_Gust_MPH',(df1['Wind_Gust'].astype('float')*2.237)) # converting wind gust to MPH
df1.insert(11,'Wave_Height_FT',(df1['Wave_Height'].astype('float')*3.281)) # converting wave height to feet


###########################################
# appending condition for Wind and Wave Direction values
# compass direction dictionary
a = {
    'min_val':[0,11.25,33.75,56.25,78.75,101.25,123.75,146.25,168.75,191.25,213.75,236.25,258.75,281.25,303.75,326.25,348.75],
    'max_val':[11.24,33.74,56.24,78.24,101.24,123.74,146.24,168.74,191.24,213.74,236.24,258.74,281.24,303.74,326.24,348.74,360.00],
    'direction':['N','NNE','NE','ENE','E','ESE','SE','SSE','S','SSW','SW','WSW','W','WNW','NW','NNW','N']
}
# changing to df
df_a = pd.DataFrame(a)

# function to apply compass direction to wave_direction data
def get_corresponding_value(i):
    for _, row in df_a.iterrows():
        if row['min_val'] <=i<=row['max_val']:
            return row['direction']
    return None

#appended values to df1 for wave direction
df1.insert(15,'Wave_Direction', df1['Dominant_Wave_Dir'].astype('float').apply(get_corresponding_value))
#appending values to df1 for wind direction
df1.insert(6,'Wind_Direction', df1['Wind_Dir'].astype('float').apply(get_corresponding_value))

  df1.replace(to_replace='MM',value = 0,inplace=True)


#### Output Testing

In [7]:
df1.head(5)

Unnamed: 0,Year,Month,Day,Hour,Minute,Wind_Dir,Wind_Direction,Wind_Speed,Wind_Speed_MPH,Wind_Gust,Wind_Gust_MPH,Wave_Height,Wave_Height_FT,Dominant_Wave_Prd,Average_Wave_Prd,Dominant_Wave_Dir,Wave_Direction,Sea_Level_Pres,Air_Temperature,Air_Temp_Deg_F,Surface_Water_Temperature,Surface_Water_Temp_Deg_F,Dew_Point_Temp,DewPoint_Temp_Deg_F,Station_Visibility,Pressure_Tendency,TIDE,Timestamp,yyyy-mm-dd
1,2024,11,25,16,0,120,ESE,2.0,4.474,3.0,6.711,0.1,0.3281,0,0,273,W,1009.9,10.7,51.26,9.8,49.64,6.9,44.42,0,0.3,0,2024-11-25 16:00,2024-11-25
2,2024,11,25,15,50,140,SE,2.0,4.474,4.0,8.948,0.1,0.3281,0,0,287,WNW,1009.7,10.7,51.26,9.9,49.82,6.6,43.88,0,0.0,0,2024-11-25 15:50,2024-11-25
3,2024,11,25,15,40,160,SSE,4.0,8.948,5.0,11.185,0.2,0.6562,0,0,157,SSE,1009.8,10.7,51.26,9.8,49.64,6.4,43.52,0,0.0,0,2024-11-25 15:40,2024-11-25
4,2024,11,25,15,30,160,SSE,4.0,8.948,5.0,11.185,0.2,0.6562,0,0,153,SSE,1009.8,10.6,51.08,9.7,49.46,6.4,43.52,0,0.0,0,2024-11-25 15:30,2024-11-25
5,2024,11,25,15,20,160,SSE,4.0,8.948,5.0,11.185,0.2,0.6562,0,0,153,SSE,1009.9,10.5,50.9,9.7,49.46,6.2,43.16,0,0.0,0,2024-11-25 15:20,2024-11-25


### Determining Correletion between Wind, Wave, and Air temps to influence water surface temp

In [12]:
df2 = df1[[
    'Wind_Dir',
    'Wind_Speed_MPH',
    'Wind_Gust_MPH',
    'Dominant_Wave_Dir',
    'Air_Temp_Deg_F',
    'DewPoint_Temp_Deg_F',
    'Wave_Height_FT',
    'Surface_Water_Temp_Deg_F'
]]

# correlation coefficient matrix
corr = df2.corr(method='pearson')
#corr output
df2.corr(method='pearson')

Unnamed: 0,Wind_Dir,Wind_Speed_MPH,Wind_Gust_MPH,Dominant_Wave_Dir,Air_Temp_Deg_F,DewPoint_Temp_Deg_F,Wave_Height_FT,Surface_Water_Temp_Deg_F
Wind_Dir,1.0,0.214536,0.216241,0.203839,0.029186,0.050938,0.312045,-0.158441
Wind_Speed_MPH,0.214536,1.0,0.977538,-0.040182,-0.052922,0.058248,0.648483,-0.147994
Wind_Gust_MPH,0.216241,0.977538,1.0,0.008342,-0.080268,0.028354,0.692249,-0.149173
Dominant_Wave_Dir,0.203839,-0.040182,0.008342,1.0,-0.1919,-0.183929,0.473169,0.100312
Air_Temp_Deg_F,0.029186,-0.052922,-0.080268,-0.1919,1.0,0.837274,-0.294815,0.599926
DewPoint_Temp_Deg_F,0.050938,0.058248,0.028354,-0.183929,0.837274,1.0,-0.189824,0.35399
Wave_Height_FT,0.312045,0.648483,0.692249,0.473169,-0.294815,-0.189824,1.0,-0.129813
Surface_Water_Temp_Deg_F,-0.158441,-0.147994,-0.149173,0.100312,0.599926,0.35399,-0.129813,1.0


#### Linear Graph
* Averaging surface water temps over period of time

In [84]:
#time_avg = df1['yyyy-mm-dd']
# grouped df
sp_df = df1.groupby(['yyyy-mm-dd','Surface_Water_Temp_Deg_F']).agg(avg_temp = ('Surface_Water_Temp_Deg_F','mean'))
sp_df

Unnamed: 0_level_0,Unnamed: 1_level_0,avg_temp
yyyy-mm-dd,Surface_Water_Temp_Deg_F,Unnamed: 2_level_1
2024-10-11,65.12,65.12
2024-10-11,65.30,65.30
2024-10-11,65.48,65.48
2024-10-11,65.66,65.66
2024-10-11,65.84,65.84
...,...,...
2024-11-24,49.64,49.64
2024-11-25,49.28,49.28
2024-11-25,49.46,49.46
2024-11-25,49.64,49.64


In [38]:
# surface water temp shape
sp_df.describe()

Unnamed: 0,Surface_Water_Temp_Deg_F
count,261.0
mean,59.078621
std,4.968158
min,49.28
25%,55.22
50%,59.72
75%,63.14
max,67.28


In [86]:
#left off here
import seaborn as sns
import matplotlib.pyplot as plt
plt.scatter(sp_df[''],sp_df['avg_temp']) 

In [36]:
# Entire lookback
import seaborn as sns
import matplotlib.pyplot as plt
sns.scatterplot(
    x=sp_df['yyyy-mm-dd'],
    y=sp_df['Surface_Water_Temp_Deg_F']
    ,data=corr)

KeyError: 'yyyy-mm-dd'

### Notes
---

Appears that surface water temperature is more influenced by air temperature, which has the strongest correlation,
followed by the dewpoint temperature and then the dominant wave direction (the point of highest wave energy direction that wave is traveling from);
I wonder if this can mean, when surface water is warmer, if there is a slight influence when wind direction is from shore (where wind direction is from the east)....does dominant wind direction ever come from the shore? I'll need to check this out.  Could see if I can pull stationary thermostat temps maybe at local weather station to correlte with the buoy ir temp

#### CSV Output

In [33]:
# writing output to csv file 
df1.to_csv(r'C:/Users/errol/Documents/test_output3.csv')