In [1]:
import xarray as xr
import numpy as np
import time as t
import pandas as pd

#### Local time versus UTC

In [10]:
filtered_edges = np.load( 'CT-allDataFilteredEdges_tropical.npy' )
local_time = np.load( 'CT-allDataLocalTimes_tropical.npy' )

print( filtered_edges.shape )
print( local_time.shape )
print( '~~~~~~~~~~~~~~' )
times = filtered_edges[:,10]
not_multiples_of_three = times[ times%3 != 0 ]
print( not_multiples_of_three )

(803075, 49)
(803075, 49)
~~~~~~~~~~~~~~
[]


#### This cell copies fields from ERA-Interim files to ERA-5 files.

In [None]:
var = [ 'z', 'lifetime' ]
for i in np.arange( 1983, 1984):#2009 ):
    print( i )
    file1 = xr.open_dataset( '/groups/sylvia/JAS-MCS-rain/ERAI/colloc_' + str(i) + '.nc' )
    file2 = xr.open_dataset( '/groups/sylvia/JAS-MCS-rain/ERA5/colloc5_' + str(i) + '.nc' )
    for v in var:
        file2[v] = file1[v]
    print(file2)

#### This cell extracts only the ISCCP data for a tropical domain.

In [3]:
hilat = 10
lowlat = -10
#ISCCP_data = np.load( "CT-allDataLocalTimes.npy" )
ISCCP_data = np.load( "CT-allDataFilteredEdges.npy" )
CT_lat = ISCCP_data[:,12]
print( CT_lat.min(), CT_lat.max() )

filtered_CT_data = ISCCP_data[(CT_lat >= lowlat) & (CT_lat <= hilat)]
#np.save( "CT-allDataLocalTimes_tropical.npy", filtered_CT_data )
np.save( "CT-allDataFilteredEdges_tropical.npy", filtered_CT_data )
print( filtered_CT_data[:,12].min(), filtered_CT_data[:,12].max() )

-51.0 51.9
-10.0 10.0


#### Testing / debugging capeCollocate_ERA5.py

In [2]:
filtered_data = np.load( "CT-allDataLocalTimes_tropical.npy" )
years = filtered_data[:,7]
data_1983 = filtered_data[(years == 1983.)]
print( data_1983[:,8] )

[ 8.  8.  8. ... 12. 12. 12.]


In [None]:
era_data = xr.open_dataset( '/xdisk/sylvia/ERA5_output/ERA5_qv_tropical.nc' )
#era_data = xr.open_dataset( '/xdisk/sylvia/ERA5_output/ERA5_cape_tropical.nc' )
#era_data = era_data.assign_coords( longitude=((era_data['longitude'] + 360) % 360) )

time = era_data['valid_time'].dt.round("S")
longitude = era_data['longitude']
display( time )

In [None]:
for l in longitude:
    print( l.values )
    print( l.values%360 )
    print( '~~~~~~~~~~~~~~~~~~`' )

#### Look at csv file values

In [43]:
year = 2000
df = pd.read_csv( "output/CT_qv_statistics_" + str(year) + "_01.csv" )
print( df['qv_mean'] )

0       0.007076
1            NaN
2       0.007436
3       0.007021
4       0.006983
          ...   
2868    0.006947
2869    0.006802
2870    0.007184
2871    0.006706
2872    0.006745
Name: qv_mean, Length: 2873, dtype: float64


#### Combine csv files one with another AND THEN with MCS precip values

In [34]:
# Some initial setup and specification of values here
basedir1 = '/groups/sylvia/JAS-MCS-rain/ISCCP/output/'
#var_list = [ 'qv', 'temperature', 'qc', 'qi', 'w' ]
var_list = [ 'temperature', 'qc', 'qi' ]
year = 2000
for month in np.arange( 1, 13 ):
    formatted_month = f"{month:02d}"
    
    filename1 = "CT_qv_statistics_" + str(year) + "_" + formatted_month + ".csv"
    filename2 = "CT_temperature_statistics_" + str(year) + "_" + formatted_month + ".csv"
    filename3 = "CT_qc_statistics_" + str(year) + "_" + formatted_month + ".csv"
    filename4 = "CT_qi_statistics_" + str(year) + "_" + formatted_month + ".csv"
    #filename5 = "CT_w_statistics_" + str(year) + "_" + formatted_month + ".csv"
    filename6 = "CT_cape_statistics_" + str(year) + "_" + formatted_month + ".csv"
    
    df1 = pd.read_csv(basedir1 + filename1)
    df2 = pd.read_csv(basedir1 + filename2)
    df3 = pd.read_csv(basedir1 + filename3)
    df4 = pd.read_csv(basedir1 + filename4)
    #df5 = pd.read_csv(basedir1 + filename5)
    df6 = pd.read_csv(basedir1 + filename6)

    df1['temperature_mean'] = df2['temperature_mean']
    df1['temperature_99'] = df2['temperature_99']
    df1['qc_mean'] = df3['qc_mean']
    df1['qc_99'] = df3['qc_99']
    df1['qi_mean'] = df4['qi_mean']
    df1['qi_99'] = df4['qi_99']
    #df1['w_mean'] = df5['w_mean']
    #df1['w_99'] = df5['w_99']
    df1['cape_mean'] = df6['cape_mean']
    df1['cape_99'] = df6['cape_99']

    df1.to_csv( "CT_statistics_" + str(year) + "_" + formatted_month + ".csv" )
    
    print( len(df1), len(df2) , len(df3), len(df4) )
    

2873 2873 2873 2873
2694 2694 2694 2694
3523 3523 3523 3523
4196 4196 4196 4196
3849 3849 3849 3849
2591 2591 2591 2591
2837 2837 2837 2837
2544 2544 2544 2544
2846 2846 2846 2846
3492 3492 3492 3492
3272 3272 3272 3272
2531 2531 2531 2531


In [40]:
# Some initial setup and specification of values here
basedir1 = '/groups/sylvia/JAS-MCS-rain/ISCCP/output/'
basedir2 = '/groups/sylvia/JAS-MCS-rain/ERAI/'
efile = xr.open_dataset( basedir2 + 'colloc_' + str(year) + '_NZ.nc' )

# Initialize an empty dataframe to hold yearly data
yearly_reduced_df = pd.DataFrame()

# Iterate over months in a given year to combine these in an nc file / yr    
m = 0
for month in np.arange( 1, 13 ):
    formatted_month = f"{month:02d}"

    # Filter the existing_file for the month of interest
    ma = xr.where( efile['month'] == month, 1, 0 )
    
    # Combine MCS properties to match the csv in the month of interest
    num_rows = ma.sum().item()
    vars_to_collocate = [ 'day', 'rad', 'ctt', 'lifetime', 'minctt', 'maxrad' ]
    ecolloc_mcs_vals = np.full((num_rows, len(vars_to_collocate)), np.nan)  # Initialize with NaN
    for i, v in enumerate( vars_to_collocate ):
        filtered_vals = efile[v].where(ma == 1, drop=True).values
        ecolloc_mcs_vals[:, i] = filtered_vals
    
    filename = "CT_statistics_" + str(year) + "_" + formatted_month + ".csv"
    df = pd.read_csv(basedir1 + filename)

    # Combine MCS properties from the csv file
    ncolloc_mcs_vals = df[['day', 'cs_radius', 'cs_temp', 'lifetime', 'min_temp', 'max_radius']].to_numpy()

    # Validate data types before matching
    ecolloc_mcs_vals = ecolloc_mcs_vals.astype(np.float64)
    ncolloc_mcs_vals = ncolloc_mcs_vals.astype(np.float64)

    #print("Shape of ecolloc_mcs_vals:", ecolloc_mcs_vals.shape)
    #print("Shape of ncolloc_mcs_vals:", ncolloc_mcs_vals.shape)

    # Perform row-wise matching
    tolerance = 1e-6
    match_matrix = np.all(np.abs(ecolloc_mcs_vals[:, None, :] - ncolloc_mcs_vals[None, :, :]) < tolerance, axis=2)

    # Find all matches using np.where
    matching_indices = np.array(np.where(match_matrix)).T  # Convert to two-column format

    #print("Matching indices (ecolloc_mcs_vals, ncolloc_mcs_vals):")
    #print(matching_indices.shape)
    m = m + matching_indices.shape[0]

    # Extract rows from df at matching_indices[:, 1]
    reduced_df = df.iloc[matching_indices[:, 1]].copy()  # Retain only rows at matching indices from df

    # Extract the pmax and pacc values at the matching index from ecolloc_mcs_vals
    matched_pacc = efile['pacc'].values[matching_indices[:, 0]]  # From ecolloc_mcs_vals
    matched_pmax = efile['pmax'].values[matching_indices[:, 0]]  # From ecolloc_mcs_vals

    # Add 'pacc' and 'pmax' as new columns to the dataframe df
    reduced_df['pacc'] = matched_pacc
    reduced_df['pmax'] = matched_pmax
    
    # Append the reduced_df for the current month to the yearly dataframe
    yearly_reduced_df = pd.concat([yearly_reduced_df, reduced_df], ignore_index=True)

# Print yearly dataframe shape
print("Shape of yearly reduced dataframe:", yearly_reduced_df.shape)

# Convert yearly dataframe to xarray Dataset
yearly_ds = yearly_reduced_df.to_xarray()

# Save the yearly dataset to a NetCDF file
output_filename = f"colloc5_{year}_NZ.nc"
yearly_ds.to_netcdf(basedir1 + output_filename)
print( m )      

Shape of yearly reduced dataframe: (17818, 33)
17818


In [None]:
# Simulate new data for this month (occurrence, altitude)
    new_occurrences = 10  # Example occurrences per month
    altitudes = 5  # Example altitude levels

    new_data = xr.Dataset({
        "variable_1D": (("occurrence"), np.random.rand(new_occurrences)),  # 1D
        "variable_2D": (("occurrence", "altitude"), np.random.rand(new_occurrences, altitudes))  # 2D
    },
    coords={
        "occurrence": np.arange(new_occurrences),
        "altitude": np.arange(altitudes)
    })

    try:
        # Open existing file, read into memory
        existing_data = xr.open_dataset(file_path)
        
        # Concatenate along the occurrence dimension
        combined_data = xr.concat([existing_data, new_data], dim="occurrence")

        # Save back to NetCDF (overwrite)
        combined_data.to_netcdf(file_path, mode="w")  # Mode "w" overwrites

        print(f"Appended data for month {month}")

    except FileNotFoundError:
        # If the file does not exist, create a new one
        new_data.to_netcdf(file_path, mode="w")
        print(f"Created new NetCDF file for month {month}")