BB detection at LWSD (and BRND)
A date at any station that satisfies 2 or more criteria is counted as a SB day.
Variables: air temperature, wind speed, wind direction

In [7]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import datetime
import glob

In [8]:
# Set the directory containing the buoy data files
data_directory = 'G:/My Drive/Summer Research/BB detection/NDBC_Buoys'

# Output directory for saving the data
output_directory = r'G:/My Drive/Summer Research/BB detection/NDBC_Buoys/NDBC_passages'

Change Station as needed

In [9]:
# List of file paths for different station data files
#file_paths = glob.glob(data_directory + '/lwsd1h20*.txt')
file_paths = glob.glob(data_directory + '/lwsd1h2021.txt')

In [11]:
# Criteria thresholds
temperature_threshold = 1
direction_change_threshold = 45 
wind_speed_change_threshold = 1.5  

In [12]:
# Create data frames to store passage information
passages_df = pd.DataFrame(columns=['DateTime', 'ATMP', 'WSPD', 'WDIR'])
    
# Iterate through the data files
for file_path in file_paths:
    # Load data from the text file
    data = pd.read_csv(file_path, delimiter='\s+', skiprows=[1],
                   usecols=['#YY', 'MM', 'DD', 'hh', 'mm', 'WDIR', 'WSPD', 'PRES',  'ATMP',  'WTMP' ],
                   na_filter=True, 
                   na_values=['99.0', '99.00', '999.0', '9999.0'])  # Adjust delimiter as per your file format
    
    # Rename the '#YY', 'MM' , 'DD', 'hh', 'mm' columns
    data.rename(columns={'#YY':'Year', 'MM':'Month', 'DD':'Day', 'hh':'Hour', 'mm':'Minute'}, inplace=True)
    
    # Concatenate the Year, Month, Day, Hour, Minute in one column and set this column as Index
    data['DateTime'] = pd.to_datetime(data[['Year', 'Month', 'Day', 'Hour', 'Minute']])

    data.index = data.DateTime
    
    # Filter the data for the desired date range
    start_date = pd.to_datetime('2021-5-1')
    end_date = pd.to_datetime('2021-10-01')
    data_range = data[(data['DateTime'] >= start_date) & (data['DateTime'] <= end_date)]

    # Filter the data for the desired time range (12 PM to 2 AM next day)
    filtered_data = data_range[
        ((data_range['Hour'] >= 12) & (data_range['Hour'] <= 23)) #|
        #((data_range['Hour'] >= 0) & (data_range['Hour'] <= 2))
    ]
    
    # Iterate through the data to detect sea breeze passages
    # Compare to 30 minutes after each time points
    for i in range(len(filtered_data) - 5):
        curr_row = filtered_data.iloc[i]
        next_row = filtered_data.iloc[i + 5]
        cur_row = filtered_data.iloc[i]
        nex_row = filtered_data.iloc[i+5]

        # Check if any one station meets any two of the three criteria
        num_criteria_met = 0

        # Check air temperature threshold
        if cur_row['ATMP'] - nex_row['ATMP'] > temperature_threshold:
            num_criteria_met += 1

        # Check wind speed is greater than 1
        if curr_row['WSPD'] > 1 and next_row['WSPD'] > 1:
            # Check wind speed change threshold
            #speed_change = next_row['WSPD'] - curr_row['WSPD']
            #if speed_change > wind_speed_change_threshold:
            num_criteria_met += 1

        # Check wind direction change threshold
        direction_change = curr_row['WDIR'] - next_row['WDIR']
        #if abs(direction_change) > direction_change_threshold:
        #   num_criteria_met += 1
        
        #if file_path == r'G:\My Drive\Summer Research\BB detection\NDBC_Buoys\lwsd1h2021.txt':
        if abs(direction_change) > direction_change_threshold and 0 < next_row['WDIR'] < 180:
           num_criteria_met += 1
        #elif 320 < curr_row['WDIR'] < 360 and 45 < next_row['WDIR'] < 100 and abs(direction_change) > direction_change_threshold:
        #    num_criteria_met += 1

        # Check if any two criteria are met
        if num_criteria_met >= 3:
            passage_time = curr_row['DateTime']  # Time of sea breeze passage
            temperature = curr_row['ATMP']  # Air temperature at the passage time
            windspeed = curr_row['WSPD'] # Wind speed at the passage time
            wind_direction = curr_row['WDIR']  # Wind direction at the passage time

            # Append the passage information to the data frame
            passages_df = passages_df.append({'DateTime': passage_time, 'ATMP': temperature, 'WDIR': wind_direction, 'WSPD': windspeed},
                                             ignore_index=True)
print(passages_df)

  passages_df = passages_df.append({'DateTime': passage_time, 'ATMP': temperature, 'WDIR': wind_direction, 'WSPD': windspeed},
  passages_df = passages_df.append({'DateTime': passage_time, 'ATMP': temperature, 'WDIR': wind_direction, 'WSPD': windspeed},
  passages_df = passages_df.append({'DateTime': passage_time, 'ATMP': temperature, 'WDIR': wind_direction, 'WSPD': windspeed},
  passages_df = passages_df.append({'DateTime': passage_time, 'ATMP': temperature, 'WDIR': wind_direction, 'WSPD': windspeed},
  passages_df = passages_df.append({'DateTime': passage_time, 'ATMP': temperature, 'WDIR': wind_direction, 'WSPD': windspeed},
  passages_df = passages_df.append({'DateTime': passage_time, 'ATMP': temperature, 'WDIR': wind_direction, 'WSPD': windspeed},
  passages_df = passages_df.append({'DateTime': passage_time, 'ATMP': temperature, 'WDIR': wind_direction, 'WSPD': windspeed},
  passages_df = passages_df.append({'DateTime': passage_time, 'ATMP': temperature, 'WDIR': wind_direction, 'WSP

                DateTime  ATMP WSPD   WDIR
0    2021-05-04 16:06:00  25.2  5.9  299.0
1    2021-05-04 16:12:00  24.1  5.4  302.0
2    2021-05-04 16:24:00  23.6  4.9  309.0
3    2021-05-04 16:30:00  25.5  7.3  295.0
4    2021-05-04 16:42:00  21.8  3.5   10.0
..                   ...   ...  ...    ...
176  2021-09-18 23:48:00  25.9  2.9  262.0
177  2021-09-28 16:12:00  26.6  6.2  305.0
178  2021-09-28 23:42:00  24.7  4.2  277.0
179  2021-09-28 23:48:00  24.7  4.3  272.0
180  2021-09-28 23:54:00  24.6  4.5  277.0

[181 rows x 4 columns]


In [292]:
# Save the passages as a text file
passages_df.to_csv(f'{output_directory}/lwsd_passages_2021.csv', index=False)

In [13]:
# Convert 'DateTime' column to datetime format
passages_df['DateTime'] = pd.to_datetime(passages_df['DateTime'])

# Create a dataframe to store the first passage after 12pm for each day
first_passage_df = pd.DataFrame(columns=['FirstPassageTime', 'ATMP', 'WDIR', 'WSPD'])

# Iterate through the detected sea breeze passages to find the first passage after 12pm for each day
for passage_date in passages_df['DateTime'].dt.date.unique():
    passages_date_filtered = passages_df[passages_df['DateTime'].dt.date == passage_date]
    
    # Filter out passages before 12 pm
    passages_date_filtered = passages_date_filtered[passages_date_filtered['DateTime'].dt.hour >= 12]
    
    if not passages_date_filtered.empty:
        first_passage = passages_date_filtered.iloc[0]
        
        # Append the first passage information to the dataframe
        first_passage_df = first_passage_df.append({'FirstPassageTime': first_passage['DateTime'], 'ATMP': first_passage['ATMP'], 'WDIR': first_passage['WDIR'], 'WSPD': first_passage['WSPD']}, ignore_index=True)
        # 'Date': passage_date, 
first_passage_df.index = first_passage_df.FirstPassageTime
# Print the first passage dataframe
print(first_passage_df)

  first_passage_df = first_passage_df.append({'FirstPassageTime': first_passage['DateTime'], 'ATMP': first_passage['ATMP'], 'WDIR': first_passage['WDIR'], 'WSPD': first_passage['WSPD']}, ignore_index=True)
  first_passage_df = first_passage_df.append({'FirstPassageTime': first_passage['DateTime'], 'ATMP': first_passage['ATMP'], 'WDIR': first_passage['WDIR'], 'WSPD': first_passage['WSPD']}, ignore_index=True)
  first_passage_df = first_passage_df.append({'FirstPassageTime': first_passage['DateTime'], 'ATMP': first_passage['ATMP'], 'WDIR': first_passage['WDIR'], 'WSPD': first_passage['WSPD']}, ignore_index=True)
  first_passage_df = first_passage_df.append({'FirstPassageTime': first_passage['DateTime'], 'ATMP': first_passage['ATMP'], 'WDIR': first_passage['WDIR'], 'WSPD': first_passage['WSPD']}, ignore_index=True)
  first_passage_df = first_passage_df.append({'FirstPassageTime': first_passage['DateTime'], 'ATMP': first_passage['ATMP'], 'WDIR': first_passage['WDIR'], 'WSPD': first_passage

                        FirstPassageTime  ATMP   WDIR  WSPD
FirstPassageTime                                           
2021-05-04 16:06:00  2021-05-04 16:06:00  25.2  299.0   5.9
2021-05-05 15:48:00  2021-05-05 15:48:00  22.7  215.0   2.9
2021-05-06 23:30:00  2021-05-06 23:30:00  15.0  209.0   3.5
2021-05-12 23:30:00  2021-05-12 23:30:00  17.3  318.0   2.7
2021-05-15 13:30:00  2021-05-15 13:30:00  18.7  209.0   1.2
2021-05-19 23:30:00  2021-05-19 23:30:00  26.7  265.0   2.5
2021-05-20 23:30:00  2021-05-20 23:30:00  16.9  131.0   2.9
2021-05-22 17:42:00  2021-05-22 17:42:00  27.1  330.0   4.4
2021-05-23 23:30:00  2021-05-23 23:30:00  25.5  314.0   4.7
2021-05-26 17:00:00  2021-05-26 17:00:00  30.7  196.0   3.8
2021-05-27 23:30:00  2021-05-27 23:30:00  24.2  145.0   1.7
2021-06-01 15:00:00  2021-06-01 15:00:00  21.9  222.0   2.2
2021-06-03 18:42:00  2021-06-03 18:42:00  27.3  250.0   5.0
2021-06-05 16:06:00  2021-06-05 16:06:00  29.5  291.0   2.8
2021-06-06 15:48:00  2021-06-06 15:48:00

  first_passage_df = first_passage_df.append({'FirstPassageTime': first_passage['DateTime'], 'ATMP': first_passage['ATMP'], 'WDIR': first_passage['WDIR'], 'WSPD': first_passage['WSPD']}, ignore_index=True)
  first_passage_df = first_passage_df.append({'FirstPassageTime': first_passage['DateTime'], 'ATMP': first_passage['ATMP'], 'WDIR': first_passage['WDIR'], 'WSPD': first_passage['WSPD']}, ignore_index=True)
  first_passage_df = first_passage_df.append({'FirstPassageTime': first_passage['DateTime'], 'ATMP': first_passage['ATMP'], 'WDIR': first_passage['WDIR'], 'WSPD': first_passage['WSPD']}, ignore_index=True)
  first_passage_df = first_passage_df.append({'FirstPassageTime': first_passage['DateTime'], 'ATMP': first_passage['ATMP'], 'WDIR': first_passage['WDIR'], 'WSPD': first_passage['WSPD']}, ignore_index=True)


In [14]:
# Save the first passages as a text file
first_passage_df.to_csv(f'{output_directory}/lwsd_first_passage_2021_ns.csv', index=False)

Unsure if the following works, need to check

In [None]:
# Create a dataframe to store the first passage after 12pm for each day and each file
first_passage_df = pd.DataFrame(columns=['File', 'FirstPassageTime', 'ATMP', 'WDIR', 'WSPD'])

# Iterate through the data files
for file_path in file_paths:
    # Load data from the text file
    data = pd.read_csv(file_path, delimiter='\s+', skiprows=[1],
                   usecols=['#YY', 'MM', 'DD', 'hh', 'mm', 'WDIR', 'WSPD', 'PRES',  'ATMP',  'WTMP'],
                   na_filter=True, 
                   na_values=['99.0', '99.00', '999.0', '9999.0'])  # Adjust delimiter as per your file format
    
    # Rename the '#YY', 'MM' , 'DD', 'hh', 'mm' columns
    data.rename(columns={'#YY':'Year', 'MM':'Month', 'DD':'Day', 'hh':'Hour', 'mm':'Minute'}, inplace=True)
    
    # Concatenate the Year, Month, Day, Hour, Minute in one column and set this column as Index
    data['DateTime'] = pd.to_datetime(data[['Year', 'Month', 'Day', 'Hour', 'Minute']])
    data.index = data.DateTime
    
    # Filter the data for the desired date range
    start_date = pd.to_datetime('2021-04-30')
    end_date = pd.to_datetime('2021-10-01')
    data_range = data[(data['DateTime'] >= start_date) & (data['DateTime'] <= end_date)]

    # Filter the data for the desired time range (12 PM to 3 AM next day)
    filtered_data = data_range[
        ((data_range['Hour'] >= 12) & (data_range['Hour'] <= 23)) |
        ((data_range['Hour'] >= 0) & (data_range['Hour'] <= 2))
    ]
    
    # Iterate through the data to detect sea breeze passages
    for i in range(len(filtered_data) - 5):
        curr_row = filtered_data.iloc[i]
        next_row = filtered_data.iloc[i + 5]
        cur_row = filtered_data.iloc[i]
        nex_row = filtered_data.iloc[i+5]

        # Check if any one station meets any two of the three criteria
        num_criteria_met = 0

        # Check temperature threshold
        if cur_row['ATMP'] - nex_row['ATMP'] > temperature_threshold:
            num_criteria_met += 1

        # Check wind speed is less than 6.5
        if curr_row['WSPD'] < 6.5:
            # Check wind speed change threshold
            speed_change = next_row['WSPD'] - curr_row['WSPD']
            if speed_change > wind_speed_change_threshold:
                num_criteria_met += 1

        # Check wind direction change threshold
        direction_change = curr_row['WDIR'] - next_row['WDIR']
        #if abs(direction_change) > direction_change_threshold:
        #    num_criteria_met += 1
        
        #if file_path == r'G:\My Drive\Summer Research\BB detection\NDBC_Buoys\lwsd1h2021.txt':
        if 320 < curr_row['WDIR'] < 360 and 180 < next_row['WDIR'] < 240 and abs(direction_change) > direction_change_threshold:
            num_criteria_met += 1
        #elif 320 < curr_row['WDIR'] < 360 and 45 < next_row['WDIR'] < 100 and abs(direction_change) > direction_change_threshold:
        #    num_criteria_met += 1

        # Check if any two criteria are met
        if num_criteria_met >= 2:
            passage_time = curr_row['DateTime']  # Time of sea breeze passage
            temperature = curr_row['ATMP']  # Air temperature at the passage time
            windspeed = curr_row['WSPD'] # Wind speed at the passage time
            wind_direction = curr_row['WDIR']  # Wind direction at the passage time

            # Extract the name "lwsd" from the file_path and assign it to the 'File' column
            file_name = file_path.split('\\')[-1]
            # Append the passage information to the data frame
            passages_df = passages_df.append({'File': file_name, 'DateTime': passage_time, 'ATMP': temperature, 'WDIR': wind_direction, 'WSPD': windspeed},
                                             ignore_index=True)

            # Append the first passage information to the dataframe
            first_passage_df = first_passage_df.append({'File': file_name, 'FirstPassageTime': passage_time, 'ATMP': temperature, 'WDIR': wind_direction, 'WSPD': windspeed},
                                                       ignore_index=True)

        print(passages_df)
