In [2]:
import pandas as pd
from scipy.spatial import cKDTree

# Load wind_data and wave_data from CSV files
wind_data = pd.read_csv("wind_data.csv")
wave_data = pd.read_csv("wave_data.csv")

# Convert wind_data and wave_data coordinates to arrays
wind_coords = wind_data[['lat', 'lon']].values
wave_coords = wave_data[['lat', 'lon']].values

# Build a KDTree using wind_data coordinates
tree = cKDTree(wind_coords)

# Find the index of the nearest wave_data point for each wind_data point
_, idx = tree.query(wave_coords)
# The query method with wave_coords will find the nearest wind_data point for each wave_data point. 

# Add the nearest wave_data information to wind_data
nearest_wave_data = wave_data.loc[idx].reset_index(drop=True)
merged_data = pd.concat([wind_data, nearest_wave_data], axis=1)

# Now merged_data contains the wave_data with the corresponding nearest wind_data information


In [12]:
print(merged_data.head())

       lat      lon   V10PCT_0  V10PCT_22  V10PCT_45  V10PCT_67  V10PCT_90  \
0  56.3125  13.9375   5.740000   6.593333  10.159000   8.482308   8.122500   
1  54.5625  15.4375   9.560000  11.627647  11.312432  11.404286  10.509310   
2  54.6875  15.5625   9.055000  10.330667  11.656857  10.435000  10.643043   
3  54.8125  15.6875  11.300000   9.430714  10.500238   9.960476  10.598667   
4  54.9375  15.8125   9.115556  10.095714  10.445000  11.572222  10.423103   

   V10PCT_112  V10PCT_135  V10PCT_157  ...  Wav_PPCT135  Wav_PPCT157  \
0    6.058182    7.372857    6.646667  ...     0.054272     0.045256   
1   10.080000   10.512500    7.903333  ...     0.044600     0.042902   
2   10.403750   11.817500    8.545000  ...     0.037156     0.041178   
3   10.263636   11.571250   10.460000  ...     0.041570     0.048203   
4   10.522727   11.608000   11.580000  ...     0.043094     0.045542   

   Wav_PPCT180  Wav_PPCT202  Wav_PPCT225  Wav_PPCT247  Wav_PPCT270  \
0     0.032807     0.030302 

In [13]:
# Save the merged data to a new CSV file
merged_data.to_csv("windwave_data2.csv", index=False)


In [None]:
# Test if 1 and 2 are different

import pandas as pd

# Load the two datasets into pandas DataFrames
data1 = pd.read_csv("windwave_data1.csv")
data2 = pd.read_csv("windwave_data2.csv")

# Step 2: Check if the shape of the two DataFrames is the same
if data1.shape == data2.shape:
    print("The two datasets have the same shape.")
else:
    print("The two datasets have different shapes and are not the same.")
    exit()

# Step 3: Compare the values in each corresponding cell of the two DataFrames
are_equal = data1.equals(data2)

# Step 4: Check for any missing values (NaN) in both DataFrames
has_missing_values = data1.isna().sum().sum() != data2.isna().sum().sum()

if are_equal and not has_missing_values:
    print("The two datasets are the same.")
else:
    print("The two datasets are not exactly the same.")


Removing NA and 0 values from data:

In [11]:
windwave_data2a = pd.read_csv("windwave_data2a.csv")

windwave_data2a.drop(windwave_data2a[(windwave_data2a['lat'] == 0) & (windwave_data2a['lon'] == 0)].index, inplace=True)
windwave_data2a.dropna(subset=['lat', 'lon'], inplace=True)

print(windwave_data2a.tail())

windwave_data2a.to_csv("windwave_data2b.csv", index=False)

         lat      lon  V10PCT_0  V10PCT_22  V10PCT_45  V10PCT_67  V10PCT_90  \
805  54.3125  20.6875       0.0        0.0        0.0       0.00        0.0   
806  56.3125  13.6875       0.0        0.0        0.0       0.00        0.0   
807  53.8125  20.1875       0.0        0.0        0.0       7.12        0.0   
808  55.1875  14.0625       0.0        0.0        0.0       0.00        0.0   
809  55.5625  13.4375       0.0        0.0        0.0       0.00        0.0   

     V10PCT_112  V10PCT_135  V10PCT_157  ...  Wav_PPCT135  Wav_PPCT157  \
805         0.0         0.0         0.0  ...     0.075348     0.052031   
806         0.0         0.0         0.0  ...     0.075348     0.052031   
807         0.0         0.0         0.0  ...     0.075348     0.052031   
808         0.0         0.0         0.0  ...     0.075348     0.052031   
809         0.0         0.0         0.0  ...     0.075348     0.052031   

     Wav_PPCT180  Wav_PPCT202  Wav_PPCT225  Wav_PPCT247  Wav_PPCT270  \
805     