In [3]:
from load_trajectories import get_timestamps, load_trajectories
# Using the NPL 2024a kernel
import os
import os.path

#for calculating the air temperature
from metpy.units import units
from metpy.calc import temperature_from_potential_temperature

#time
from datetime import timedelta

import dask
import dask.dataframe as dd
import numpy as np
import pandas as pd
import xarray as xr
from matplotlib import pyplot as plt



# Folder 1 (using one netcdf file)

In [4]:
dirpath = "/glade/derecho/scratch/klamb/superdroplets/outsdm_iceball_nowind_rhod_dist_min200_sgs_1024_poly_trj_5400_7200"
fn = "cm1out_only_upto35.nc"
newncfilepath = os.path.join(dirpath, fn)
print(newncfilepath)

/glade/derecho/scratch/klamb/superdroplets/outsdm_iceball_nowind_rhod_dist_min200_sgs_1024_poly_trj_5400_7200/cm1out_only_upto35.nc


In [5]:
#Opened the netcdf files into a Dataset
nc = xr.open_dataset(newncfilepath)
nc

In [7]:
nc['z'].values

array([ 0.05      ,  0.15      ,  0.25      ,  0.35000002,  0.45000002,
        0.55      ,  0.65000004,  0.75000006,  0.85      ,  0.95000005,
        1.0500001 ,  1.1500001 ,  1.25      ,  1.35      ,  1.45      ,
        1.5500001 ,  1.6500001 ,  1.7500001 ,  1.8500001 ,  1.95      ,
        2.0500002 ,  2.15      ,  2.25      ,  2.3500001 ,  2.45      ,
        2.5500002 ,  2.65      ,  2.7500002 ,  2.8500001 ,  2.95      ,
        3.0500002 ,  3.15      ,  3.2500002 ,  3.3500001 ,  3.45      ,
        3.5500002 ,  3.65      ,  3.7500002 ,  3.8500001 ,  3.9500003 ,
        4.05      ,  4.15      ,  4.25      ,  4.3500004 ,  4.4500003 ,
        4.55      ,  4.65      ,  4.75      ,  4.8500004 ,  4.9500003 ,
        5.05      ,  5.15      ,  5.2500005 ,  5.3500004 ,  5.4500003 ,
        5.55      ,  5.65      ,  5.7500005 ,  5.8500004 ,  5.9500003 ,
        6.05      ,  6.15      ,  6.2500005 ,  6.3500004 ,  6.4500003 ,
        6.55      ,  6.65      ,  6.7500005 ,  6.8500004 ,  6.95

In [22]:
(nc['xf'].values) * 1000
#xf = x position of the edges
#yf = y position of the edges
#xh = x position of the midpoint
#yh = y position of the midpoint

array([    0.     ,   100.     ,   200.     ,   300.     ,   400.     ,
         500.     ,   600.     ,   700.00006,   800.     ,   900.00006,
        1000.     ,  1100.     ,  1200.     ,  1300.0001 ,  1400.0001 ,
        1500.0001 ,  1600.     ,  1700.     ,  1800.0001 ,  1900.0001 ,
        2000.     ,  2100.0002 ,  2200.     ,  2300.0002 ,  2400.     ,
        2500.     ,  2600.0002 ,  2700.     ,  2800.0002 ,  2900.     ,
        3000.0002 ,  3100.0002 ,  3200.     ,  3300.0002 ,  3400.     ,
        3500.0002 ,  3600.0002 ,  3700.0002 ,  3800.0002 ,  3900.     ,
        4000.     ,  4100.0005 ,  4200.0005 ,  4300.     ,  4400.     ,
        4500.     ,  4600.0005 ,  4700.0005 ,  4800.     ,  4900.     ,
        5000.     ,  5100.0005 ,  5200.0005 ,  5300.     ,  5400.     ,
        5500.0005 ,  5600.0005 ,  5700.0005 ,  5800.     ,  5900.     ,
        6000.0005 ,  6100.0005 ,  6200.0005 ,  6300.     ,  6400.     ,
        6500.0005 ,  6600.0005 ,  6700.0005 ,  6800.     ,  6900

In [5]:
#Creating a DataArray object of the time
# time_coords = (nc['time'].values) / 1e9

time_coords = nc['time'].values / np.timedelta64(1, 's')
time_coords.astype(np.int32)

array([5160, 5220, 5280, 5340, 5400, 5460, 5520, 5580, 5640, 5700, 5760,
       5820, 5880, 5940, 6000, 6060, 6120, 6180, 6240, 6300, 6360, 6420,
       6480, 6540, 6600, 6660, 6720, 6780, 6840, 6900, 6960, 7020, 7080,
       7140, 7200, 3660, 3720, 3780, 3840, 3900, 3960, 4020, 4080, 4140,
       4200, 4260, 4320, 4380, 4440, 4500, 4560, 4620, 4680, 4740, 4800,
       4860, 4920, 4980, 5040, 5100, 5160, 5220, 5280, 5340, 5400, 5460,
       5520, 5580, 5640, 5700, 5760, 5820, 5880, 5940, 6000, 6060, 6120,
       6180, 6240, 6300, 6360, 6420, 6480, 6540, 6600, 6660, 6720, 6780,
       6840, 6900, 6960, 7020, 7080, 7140, 7200, 7260, 7320, 7380, 7440,
       7500, 7560, 7620, 7680, 7740, 7800], dtype=int32)

In [6]:
#Selecting certain data variables
selected_vars = ['rh', 'th', 'prs', 'uinterp', 'vinterp', 'winterp', 'out8', 'out9', 'out10', 'out11', 'out12', 'out13', 'out14', 'deactrat']
selected_data = nc[selected_vars]
selected_data

In [9]:
# adding variable example
#concantenate into int
selected_data['time_seconds'] = nc['time'].values / np.timedelta64(1, 's')
selected_data['time_seconds'] = selected_data['time_seconds'].astype(np.int32)
selected_data
#selected_data['time_seconds'] = (nc['time'].values) / 1e9 
# selected_data

In [None]:
# #Converting a data set to a dataframe & renaming the column names
# df = selected_data.to_dataframe()
# renamed_table = df.rename(columns={
#     'rh': 'Relative Humidity',
#     'th': 'Potential Temperature (K)',
#     'prs': 'Pressure (pa)',
#     'uinterp': 'U Interpolated to Scalar Points (Velocity Field)',
#     'vinterp': 'V Interpolated to Scalar Points (Velocity Field)',
#     'winterp': 'W Interpolated to Scalar Points (Velocity Field)',
#     'out8': 'out8: mean size in um (a)',
#     'out9': 'out9: mean size in um (c)',
#     'out10': 'out10: ice number mixing ratio (#/kg)',
#     'out11': 'out11: mean settling velocity (m/s)',
#     'out12': 'out12: mean ice density (kg/m^3)',
#     'out13': 'out13: standard deviation of size in um (a_std)',
#     'out14': 'out14: standard deviation of size in um (c_std)',
#     'deactrat': 'deactrat: mean deposition/sublimation rate (kg/kg/s)',
#     }
# )
# df_indexed = renamed_table.reset_index()
# df_indexed

# #.astype (convert float to int)

In [None]:
# #adding a column for temperature (calculating from potential temperature)
# potential_temp = selected_data['th'].values * units.kelvin
# pressure = (selected_data['prs'].values / 100) * units.hPa
# selected_data['air_temp'] = temperature_from_potential_temperature(pressure, potential_temp)
# selected_data

In [10]:
selected_data['air_temp'] = selected_data['th'] * (selected_data['prs'] / 101325) ** .286
selected_data

In [21]:
time = selected_data['time_seconds'].values
sorted_time = time

#size of the numpy array without unique
(np.sort(sorted_time)).size #105

#size of numpy array with unique times
(np.unique(np.sort(sorted_time))).size #70

70

In [None]:
#Checks if the velocity fields are different

#renamed_table['out8: mean size in um (a)'].values
#renamed_table['out9: mean size in um (c)'].values
np.unique(df_indexed['out10: ice number mixing ratio (#/kg)'].values)
# renamed_table['out11: mean settling velocity (m/s)'].values
# renamed_table['out12: mean ice density (kg/m^3)'].values
# renamed_table['out13: standard deviation of size in um (a_std)'].values
# renamed_table['out14: standard deviation of size in um (c_std)'].values

# Folder 1 (another cdf file for comparison)

In [None]:
path1 = '/glade/derecho/scratch/joko/outsdm_iceball_nowind_rhod_dist_min200_sgs_1024_poly'
fn1 = "cm1out.nc"
path1file = os.path.join(path1, fn1)

path2 = '/glade/derecho/scratch/klamb/superdroplets/outsdm_iceball_nowind_rhod_dist_min200_sgs_1024_poly_trj_5400_7200'
fn2 = "cm1out_only_upto35.nc"
path2file = os.path.join(path2, fn2)

ds1 = xr.open_dataset(path1file)
ds2 = xr.open_dataset(path2file)

In [None]:
# get the intersection of two datasets, on time dimension
ds2 = ds2.drop_duplicates(dim='time', keep='first') # drop duplicate time steps, keep the first instance

In [None]:
time_idx = np.isin(ds1.time, ds2.time) # get indices where timestamps of ds1 are in timestamps of ds2

In [None]:
ds1 = ds1.sel(time=time_idx, drop=True) # get subset based on timestamps
ds2 = ds2.sortby('time') # sort by timestamps

if ds1['time'].equals(ds2['time']):
    print('timestamps match')
else:
    print('timestamps do not match')
var = 'out8' # variable to compare
if ds1[var].equals(ds2[var]):
    print(f'Data match for variable {var}')
else:
    print(f'Data do NOT match for variable {var}')