### Buoy Data
* Connecting to buoy data via csv url
* Used pandas dataframe to stage data

In [42]:
# Connection
import pandas as pd
import numpy as np
import re #importing regex for string parsing
pd.set_option('display.max_columns', None)

#reading website data as csv
buoy_data = pd.read_csv(
    'https://www.ndbc.noaa.gov/data/realtime2/45026.txt',
        
    sep = '\s+',
    header = 0,
    index_col = None
)

#dataframe to drop first record since first record represents units of measurements for fields within dataset
df = buoy_data.iloc[1:]
df1 = pd.DataFrame(df)

#### Manipulation

In [43]:
# renaming fields
df1.rename(columns = {
    '#YY':'Year','MM':'Month','DD':'Day','hh':'Hour','mm':'Minute','WDIR':'Wind_Dir','WSPD':'Wind_Speed','GST':'Wind_Gust','WVHT':'Wave_Height',
    'DPD':'Dominant_Wave_Prd','APD':'Average_Wave_Prd','MWD':'Dominant_Wave_Dir','PRES':'Sea_Level_Pres','ATMP':'Air_Temperature',
    'WTMP':'Wave_Temperature','DEWP':'Dew_Point_Temp','VIS':'Station_Visibility','PTDY':'Pressure_Tendency'
},inplace=True)

# Find and Replace all 'MM' values as null as this is likely a nullable value from source data - from observation
df1.replace(to_replace='MM',value = 0,inplace=True)

# appending datetime stamp field to existing df
df1.insert(19,'Timestamp',(df1['Year'] + '-' + df1['Month'] + '-' + df1['Day']+ ' ' + df1['Hour'] + ':' + df1['Minute']).astype('str'),True)
df1.insert(20,'yyyy-mm-dd',(df1['Year'] + '-' + df1['Month'] + '-' + df1['Day']).astype('str'))

#grouping and sorting dataframe by timestamp field| ensure most current record at top of file
df1.sort_values(by = 'Timestamp',ascending = False, inplace = True)

#############################################
#conversions
df1.insert(14,'Air_Temp_Deg_F',((df1['Air_Temperature'].astype('float')*(9/5))+32)) #converting air temp to fahrenheit
df1.insert(16,'Wave_Temp_Deg_F',((df1['Wave_Temperature'].astype('float')*(9/5))+32)) # converting wave temp to fahrenheit
df1.insert(18,'DewPoint_Temp_Deg_F',((df1['Dew_Point_Temp'].astype('float')*(9/5))+32)) # converting dewpoint temp to fahrenheit
df1.insert(7,'Wind_Speed_MPH',(df1['Wind_Speed'].astype('float')*2.237)) # converting windspeed to MPH
df1.insert(9,'Wind_Gust_MPH',(df1['Wind_Gust'].astype('float')*2.237)) # converting wind gust to MPH
df1.insert(11,'Wave_Height_FT',(df1['Wave_Height'].astype('float')*3.281)) # converting wave height to feet


###########################################
# appending condition for Wind and Wave Direction values
# thinking I may need to create a dictionary first and then compare values with column range in dictionary



In [88]:
# testing
dir_dict = {
    'min_val':[0],
    'max_val':[11.24],
    'direction':['N']
}

pd_df = pd.DataFrame(dir_dict)
pd_df

Unnamed: 0,min_val,max_val,direction
0,0,11.24,N


In [90]:
test = np.where(
    ((df1['Dominant_Wave_Dir'].astype('float')>=dir_dict['min_val'])&
    (df1['Dominant_Wave_Dir'].astype('float')>=dir_dict['max_val'])),pd_df['direction'])
# can't seem to figure out a way to code this comparison condition so that I can append to df

ValueError: ('Lengths must match to compare', (6360,), (1,))

#### Output Testing

In [44]:
df1.head()

Unnamed: 0,Year,Month,Day,Hour,Minute,Wind_Dir,Wind_Speed,Wind_Speed_MPH,Wind_Gust,Wind_Gust_MPH,Wave_Height,Wave_Height_FT,Dominant_Wave_Prd,Average_Wave_Prd,Dominant_Wave_Dir,Sea_Level_Pres,Air_Temperature,Air_Temp_Deg_F,Wave_Temperature,Wave_Temp_Deg_F,Dew_Point_Temp,DewPoint_Temp_Deg_F,Station_Visibility,Pressure_Tendency,TIDE,Timestamp,yyyy-mm-dd
1,2024,10,27,1,30,20,5.0,11.185,9.0,20.133,1.0,3.281,6,0,357,1031.5,11.2,52.16,16.6,61.88,1.6,34.88,0,0.0,0,2024-10-27 01:30,2024-10-27
2,2024,10,27,1,20,30,5.0,11.185,7.0,15.659,1.0,3.281,5,0,354,1031.4,11.1,51.98,16.6,61.88,1.2,34.16,0,0.0,0,2024-10-27 01:20,2024-10-27
3,2024,10,27,1,10,10,5.0,11.185,10.0,22.37,1.1,3.6091,5,0,352,1031.4,11.1,51.98,16.6,61.88,1.8,35.24,0,0.0,0,2024-10-27 01:10,2024-10-27
4,2024,10,27,1,0,10,5.0,11.185,8.0,17.896,1.0,3.281,5,0,359,1031.4,11.1,51.98,16.6,61.88,1.5,34.7,0,0.4,0,2024-10-27 01:00,2024-10-27
5,2024,10,27,0,50,20,5.0,11.185,8.0,17.896,1.0,3.281,6,0,352,1031.4,11.1,51.98,16.6,61.88,1.8,35.24,0,0.0,0,2024-10-27 00:50,2024-10-27


#### CSV Output

In [5]:
# writing output to csv file 
df1.to_csv(r'C:/Users/errol/Documents/test_output.csv')