### Pandas Dataframe

In [226]:
import pandas as pd
import numpy as np
import re #importing regex for string parsing

#reading website data as csv
buoy_data = pd.read_csv(
    'https://www.ndbc.noaa.gov/data/realtime2/45026.txt',
        
    sep = '\s+',
    header = 0,
    index_col = None
)

#dataframe to drop first record since first record represents units of measurements for fields within dataset
df = buoy_data.iloc[1:]
df1 = pd.DataFrame(df)

# renaming fields
df1.rename(columns = {
    '#YY':'Year','MM':'Month','DD':'Day','hh':'Hour','mm':'Minute','WDIR':'Wind_Dir','WSPD':'Wind_Speed','GST':'Wind_Gust','WVHT':'Wave_Height',
    'DPD':'Dominant_Wave_Prd','APD':'Average_Wave_Prd','MWD':'Dominant_Wave_Dir','PRES':'Sea_Level_Pres','ATMP':'Air_Temperature',
    'WTMP':'Wave_Temperature','DEWP':'Dew_Point_Temp','VIS':'Station_Visibility','PTDY':'Pressure_Tendency'
},inplace=True)

# Find and Replace all 'MM' values as null as this is likely a nullable value from source data - from observation
df1.replace(to_replace='MM',value = 0,inplace=True)

# appending datetime stamp field to existing df
df1.insert(19,'Timestamp',(df1['Year'] + '-' + df1['Month'] + '-' + df1['Day']+ ' ' + df1['Hour'] + ':' + df1['Minute']).astype('str'),True)

#grouping and sorting dataframe by timestamp field| ensure most current record at top of file
df1.sort_values(by = 'Timestamp',ascending = False, inplace = True)

#appending Farenheight field since water and air temp is in degree celcius
df1.insert(14,'Air_Temp_Deg_F',((df1['Air_Temperature'].astype('float')*(9/5))+32))
df1.insert(16,'Wave_Temp_Deg_F',((df1['Wave_Temperature'].astype('float')*(9/5))+32))


In [227]:
df1

Unnamed: 0,Year,Month,Day,Hour,Minute,Wind_Dir,Wind_Speed,Wind_Gust,Wave_Height,Dominant_Wave_Prd,Average_Wave_Prd,Dominant_Wave_Dir,Sea_Level_Pres,Air_Temperature,Air_Temp_Deg_F,Wave_Temperature,Wave_Temp_Deg_F,Dew_Point_Temp,Station_Visibility,Pressure_Tendency,TIDE,Timestamp
1,2024,10,23,21,30,340,8.0,11.0,1.8,6,0,338,1021.1,14.6,58.28,17.1,62.78,6.8,0,0,0,2024-10-23 21:30
2,2024,10,23,21,10,350,9.0,12.0,1.7,6,0,333,1021.0,14.7,58.46,17.1,62.78,7.5,0,0,0,2024-10-23 21:10
3,2024,10,23,21,00,340,8.0,11.0,1.6,6,0,335,1020.9,14.7,58.46,17.1,62.78,7.4,0,+1.1,0,2024-10-23 21:00
4,2024,10,23,20,50,340,8.0,11.0,1.5,6,0,333,1020.8,14.5,58.10,17.1,62.78,7.5,0,0,0,2024-10-23 20:50
5,2024,10,23,20,40,350,8.0,11.0,1.7,6,0,331,1020.8,14.6,58.28,17.1,62.78,7.8,0,0,0,2024-10-23 20:40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6458,2024,09,08,00,40,10,9.0,12.0,1.5,6,0,356,1021.9,15.6,60.08,19.9,67.82,8.4,0,0,0,2024-09-08 00:40
6459,2024,09,08,00,30,350,8.0,13.0,1.8,6,0,354,1022.0,15.6,60.08,19.9,67.82,8.0,0,0,0,2024-09-08 00:30
6460,2024,09,08,00,20,360,9.0,13.0,1.6,6,0,355,1022.0,15.6,60.08,19.8,67.64,8.1,0,0,0,2024-09-08 00:20
6461,2024,09,08,00,10,360,9.0,12.0,1.7,6,0,352,1022.0,15.7,60.26,19.8,67.64,8.4,0,0,0,2024-09-08 00:10


In [228]:
# writing output to csv file 
df1.to_csv(r'C:/Users/errol/Documents/test_output.csv')

NameError: name 'stop' is not defined