SST DATA CROSS CORRELATION:

In [2]:
import numpy as np
import pandas as pd
import netCDF4 as nc

fill_value = -32767.0

# Load and process data for 2004
nc_file_1 = nc.Dataset('AQUA_MODIS.20040101_20041231.L3m.YR.SST.sst.4km.nc', 'r')
sst_data_1 = np.array(nc_file_1.variables['sst'][:])
sst_data_1 = np.where(sst_data_1 == fill_value, np.nan, sst_data_1)
latitudes = np.array(nc_file_1.variables['lat'][:])
longitudes = np.array(nc_file_1.variables['lon'][:])
nc_file_1.close()

# Load and process data for 2013
nc_file_2 = nc.Dataset('AQUA_MODIS.20130101_20131231.L3m.YR.SST.sst.4km.nc', 'r')
sst_data_2 = np.array(nc_file_2.variables['sst'][:])
sst_data_2 = np.where(sst_data_2 == fill_value, np.nan, sst_data_2)
nc_file_2.close()

# Load and process data for 2023
nc_file_3 = nc.Dataset('AQUA_MODIS.20230101_20231231.L3m.YR.SST.sst.4km.nc', 'r')
sst_data_3 = np.array(nc_file_3.variables['sst'][:])
sst_data_3 = np.where(sst_data_3 == fill_value, np.nan, sst_data_3)
nc_file_3.close()

# Create latitude and longitude grids
lon_grid, lat_grid = np.meshgrid(longitudes, latitudes)
lat_flat = lat_grid.flatten()
lon_flat = lon_grid.flatten()

# Flatten SST data arrays
sst_data_1 = sst_data_1.flatten()
sst_data_2 = sst_data_2.flatten()
sst_data_3 = sst_data_3.flatten()

# Create DataFrame
df = pd.DataFrame({
    'Latitude': lat_flat,
    'Longitude': lon_flat,
    '2004 Data': sst_data_1,
    '2013 Data': sst_data_2,
    '2023 Data': sst_data_3
})

# Filter by latitude and longitude ranges
lat_range = (-30, 30)
lon_range = (30, 120)
filtered_df = df[(df['Latitude'] >= lat_range[0]) & (df['Latitude'] <= lat_range[1]) & 
                 (df['Longitude'] >= lon_range[0]) & (df['Longitude'] <= lon_range[1])]

df = df.dropna(how='any')
filtered_df = filtered_df.ffill().bfill()

data_1=np.array(filtered_df['2004 Data']).flatten()
data_2=np.array(filtered_df['2013 Data']).flatten()
data_3=np.array(filtered_df['2023 Data']).flatten()

mean_2004 = df['2004 Data'].mean()
mean_2013 = df['2013 Data'].mean()
mean_2023 = df['2023 Data'].mean()

# Calculate anomalies by subtracting the mean from each year's data
df['2004 Data'] = df['2004 Data'] - mean_2004
df['2013 Data'] = df['2013 Data'] - mean_2013
df['2023 Data'] = df['2023 Data'] - mean_2023

correlation = df['2004 Data'].corr(df['2013 Data'])
print("Cross-correlation for 2004-2013:", correlation)
correlation = df['2013 Data'].corr(df['2023 Data'])
print("Cross-correlation for 2013-2023:", correlation)
correlation = df['2004 Data'].corr(df['2023 Data'])
print("Cross-correlation for 2004-2023:", correlation)

Cross-correlation for 2004-2013: 0.9957854756947677
Cross-correlation for 2013-2023: 0.9957723597506591
Cross-correlation for 2004-2023: 0.9952029049846566


Cross-correlation for 2004-2013: 0.9957854756864062  ---
Cross-correlation for 2013-2023: 0.9957723597422223  ---
Cross-correlation for 2004-2023: 0.9952029049732524  ---

In [6]:
import numpy as np
import pandas as pd
import netCDF4 as nc

fill_value = -32767.0

# Load and process data for 2004
nc_file_1 = nc.Dataset('AQUA_MODIS.20040101_20041231.L3m.YR.CHL.chlor_a.4km.nc', 'r')
sst_data_1 = np.array(nc_file_1.variables['chlor_a'][:])
sst_data_1 = np.where(sst_data_1 == fill_value, np.nan, sst_data_1)
latitudes = np.array(nc_file_1.variables['lat'][:])
longitudes = np.array(nc_file_1.variables['lon'][:])
nc_file_1.close()

# Load and process data for 2013
nc_file_2 = nc.Dataset('AQUA_MODIS.20130101_20131231.L3m.YR.CHL.chlor_a.4km.nc', 'r')
sst_data_2 = np.array(nc_file_2.variables['chlor_a'][:])
sst_data_2 = np.where(sst_data_2 == fill_value, np.nan, sst_data_2)
nc_file_2.close()

# Load and process data for 2023
nc_file_3 = nc.Dataset('AQUA_MODIS.20230101_20231231.L3m.YR.CHL.chlor_a.4km.nc', 'r')
sst_data_3 = np.array(nc_file_3.variables['chlor_a'][:])
sst_data_3 = np.where(sst_data_3 == fill_value, np.nan, sst_data_3)
nc_file_3.close()

# Create latitude and longitude grids
lon_grid, lat_grid = np.meshgrid(longitudes, latitudes)
lat_flat = lat_grid.flatten()
lon_flat = lon_grid.flatten()

# Flatten SST data arrays
sst_data_1 = sst_data_1.flatten()
sst_data_2 = sst_data_2.flatten()
sst_data_3 = sst_data_3.flatten()

# Create DataFrame
df = pd.DataFrame({
    'Latitude': lat_flat,
    'Longitude': lon_flat,
    '2004 Data': sst_data_1,
    '2013 Data': sst_data_2,
    '2023 Data': sst_data_3
})

# Filter by latitude and longitude ranges
lat_range = (-30, 30)
lon_range = (30, 120)
filtered_df = df[(df['Latitude'] >= lat_range[0]) & (df['Latitude'] <= lat_range[1]) & 
                 (df['Longitude'] >= lon_range[0]) & (df['Longitude'] <= lon_range[1])]

df = df.dropna(how='any')
filtered_df = filtered_df.ffill().bfill()

data_1=np.array(filtered_df['2004 Data']).flatten()
data_2=np.array(filtered_df['2013 Data']).flatten()
data_3=np.array(filtered_df['2023 Data']).flatten()

mean_2004 = df['2004 Data'].mean()
mean_2013 = df['2013 Data'].mean()
mean_2023 = df['2023 Data'].mean()

# Calculate anomalies by subtracting the mean from each year's data
df['2004 Data'] = df['2004 Data'] - mean_2004
df['2013 Data'] = df['2013 Data'] - mean_2013
df['2023 Data'] = df['2023 Data'] - mean_2023

correlation = df['2004 Data'].corr(df['2013 Data'])
print("Cross-correlation for 2004-2013:", correlation)
correlation = df['2013 Data'].corr(df['2023 Data'])
print("Cross-correlation for 2013-2023:", correlation)
correlation = df['2004 Data'].corr(df['2023 Data'])
print("Cross-correlation for 2004-2023:", correlation)

Cross-correlation for 2004-2013: 0.8639783498470676
Cross-correlation for 2013-2023: 0.8609767186365438
Cross-correlation for 2004-2023: 0.8558952523089858
