### Buoy Data
* Connecting to buoy data via csv url
* Used pandas dataframe to stage data

In [25]:
# Connection
import pandas as pd
import numpy as np
import re #importing regex for string parsing

#reading website data as csv
buoy_data = pd.read_csv(
    'https://www.ndbc.noaa.gov/data/realtime2/45026.txt',
        
    sep = '\s+',
    header = 0,
    index_col = None
)

#dataframe to drop first record since first record represents units of measurements for fields within dataset
df = buoy_data.iloc[1:]
df1 = pd.DataFrame(df)

#### Manipulation

In [26]:
# renaming fields
df1.rename(columns = {
    '#YY':'Year','MM':'Month','DD':'Day','hh':'Hour','mm':'Minute','WDIR':'Wind_Dir','WSPD':'Wind_Speed','GST':'Wind_Gust','WVHT':'Wave_Height',
    'DPD':'Dominant_Wave_Prd','APD':'Average_Wave_Prd','MWD':'Dominant_Wave_Dir','PRES':'Sea_Level_Pres','ATMP':'Air_Temperature',
    'WTMP':'Wave_Temperature','DEWP':'Dew_Point_Temp','VIS':'Station_Visibility','PTDY':'Pressure_Tendency'
},inplace=True)

# Find and Replace all 'MM' values as null as this is likely a nullable value from source data - from observation
df1.replace(to_replace='MM',value = 0,inplace=True)

# appending datetime stamp field to existing df
df1.insert(19,'Timestamp',(df1['Year'] + '-' + df1['Month'] + '-' + df1['Day']+ ' ' + df1['Hour'] + ':' + df1['Minute']).astype('str'),True)
df1.insert(20,'yyyy-mm-dd',(df1['Year'] + '-' + df1['Month'] + '-' + df1['Day']).astype('str'))

#grouping and sorting dataframe by timestamp field| ensure most current record at top of file
df1.sort_values(by = 'Timestamp',ascending = False, inplace = True)

#############################################
#conversions
df1.insert(14,'Air_Temp_Deg_F',((df1['Air_Temperature'].astype('float')*(9/5))+32)) #converting air temp to fahrenheit
df1.insert(16,'Wave_Temp_Deg_F',((df1['Wave_Temperature'].astype('float')*(9/5))+32)) # converting wave temp to fahrenheit
df1.insert(18,'DewPoint_Temp_Deg_F',((df1['Dew_Point_Temp'].astype('float')*(9/5))+32)) # converting dewpoint temp to fahrenheit
df1.insert(7,'Wind_Speed_MPH',(df1['Wind_Speed'].astype('float')*2.237).astype('float')) # converting windspeed to MPH
df1.insert(9,'Wind_Gust_MPH',(df1['Wind_Gust'].astype('float')*2.237).astype('float')) # converting wind gust to MPH


#### Output Testing

In [27]:
df1

Unnamed: 0,Year,Month,Day,Hour,Minute,Wind_Dir,Wind_Speed,Wind_Speed_MPH,Wind_Gust,Wind_Gust_MPH,...,Air_Temp_Deg_F,Wave_Temperature,Wave_Temp_Deg_F,Dew_Point_Temp,DewPoint_Temp_Deg_F,Station_Visibility,Pressure_Tendency,TIDE,Timestamp,yyyy-mm-dd
1,2024,10,26,22,20,360,6.0,13.422,9.0,20.133,...,52.34,16.7,62.06,2.4,36.32,0,0,0,2024-10-26 22:20,2024-10-26
2,2024,10,26,22,10,10,6.0,13.422,10.0,22.370,...,52.34,16.7,62.06,2.4,36.32,0,0,0,2024-10-26 22:10,2024-10-26
3,2024,10,26,22,00,360,6.0,13.422,9.0,20.133,...,52.34,16.7,62.06,2.4,36.32,0,-0.4,0,2024-10-26 22:00,2024-10-26
4,2024,10,26,21,50,20,7.0,15.659,10.0,22.370,...,52.52,16.7,62.06,2.6,36.68,0,0,0,2024-10-26 21:50,2024-10-26
5,2024,10,26,21,40,20,7.0,15.659,9.0,20.133,...,52.52,16.7,62.06,2.4,36.32,0,0,0,2024-10-26 21:40,2024-10-26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6467,2024,09,11,00,40,130,3.0,6.711,4.0,8.948,...,71.78,21.4,70.52,15.4,59.72,0,0,0,2024-09-11 00:40,2024-09-11
6468,2024,09,11,00,30,130,2.0,4.474,3.0,6.711,...,71.78,21.5,70.70,16.3,61.34,0,0,0,2024-09-11 00:30,2024-09-11
6469,2024,09,11,00,20,120,2.0,4.474,3.0,6.711,...,71.78,21.5,70.70,16.4,61.52,0,0,0,2024-09-11 00:20,2024-09-11
6470,2024,09,11,00,10,100,2.0,4.474,3.0,6.711,...,71.42,21.5,70.70,16.2,61.16,0,0,0,2024-09-11 00:10,2024-09-11


#### CSV Output

In [5]:
# writing output to csv file 
df1.to_csv(r'C:/Users/errol/Documents/test_output.csv')