In [15]:
import warnings

import pandas as pd
import xarray as xr

In [21]:
input_data = '../../qtrack_dummy_test_data/mpas_data_name_lat.nc'
output_data = '../prepped_data_for_tracking.nc'
adjust_coords = True
adjust_time = False
adjust_level = False
adjust_resolution = False
append_additional_data = False
appended_time = None

In [22]:
lon_names = ['longitude', 'lon', 'lons']
lat_names = ['latitude', 'lat', 'lats']
lev_names = ['lev', 'level', 'levels']
wind_names = ['u', 'v']
level_to_cut = 70000

In [28]:
## First, load in the dataset
correct_n_dims = 3
data_xr = xr.open_dataset(input_data)

In [24]:
#### FIRST CHECK THE VARIABLES
### Get a list of the included keys
var_list = list(data_xr.keys())
if (wind_names[0] not in var_list):
    raise Exception("Missing variable "+str(wind_names[0])+" in provided file.")
if (wind_names[1] not in var_list):
    raise Exception("Missing variable "+str(wind_names[1])+" in provided file.")

### CUT DOWN TO JUST THE VARIABLES NEEDED
data_xr = data_xr[wind_names]

### Now, check the dimensions, and adjust if necessary
for var in wind_names:
    n_dims = len(data_xr[var].dims)
    if n_dims < 3:
        raise Exception("Not enough dimensions in dataset. Check that you have at least time, longitude, and latitude included.")
    elif n_dims == 4:
        print('Possibility of uncut level. Check names of dimensions.')

        lev_key = [i for i in lev_names if i in list(data_xr.dims)]

        if len(lev_key) == 1:
            print('We have a level file, slice!')
            data_xr[var].rename({lev_key[0]:'level'})
            data_xr[var] = data_xr[var].sel(level = level_to_cut)
        else:
            raise Exception("More than three dimensions specified, but 'level/lev/levs' not found. Please cut your input data down to just time, latitude, longitude, and level coordinates.")
    elif n_dims > 4:
        raise Exception("Too many coordinates. Please cut your input data down to just time, latitude, longitude, and level (unless already cut to specified level")


### Next, check that we have at least a longitude/lon, latitude/lat, and time included. Can do this simultaneously
lon_key = [i for i in lon_names if i in list(data_xr.dims)]
lat_key = [i for i in lat_names if i in list(data_xr.dims)]

if len(lon_key) == 0:
    raise Exception("No valid longitude coordinates found in data.")
if len(lat_key) == 0:
    raise Exception("No valid latitude coordinates found in data.")
if len(lat_key) >1 :
    raise Exception("Multiple named latitude coordinates.")
if len(lon_key) >1 :
    raise Exception("Multiple named longitude coordinates.")

### Finally, we want to rename our longitude and latitude files
if lon_key[0] != 'longitude':
    data_xr = data_xr.rename({lon_key[0]:'longitude'})

if lat_key[0] != 'latitude':
    data_xr = data_xr.rename({lat_key[0]:'latitude'})
    ## REQUIREMENTS: Latitude goes from positive to negative. Longitude is in -180 to +180 (and not 0 to 360)

Possibility of uncut level. Check names of dimensions.
We have a level file, slice!
Possibility of uncut level. Check names of dimensions.
We have a level file, slice!


In [25]:
### Renaming and level slicing has been done. Now we need to make sure that we have latitudes and longitudes increasing. If not, need to reorder.
#### Check the ordering of the latitudes
if data_xr['latitude'].diff(dim='latitude').values[0] >= 0: ## If positive difference for latitude... we need to switch
    data_xr = data_xr.reindex(latitude=list(reversed(data_xr.latitude)))
if data_xr['longitude'].diff(dim='longitude').values[0] <= 0: ## If negative difference for longitude... we need to switch
    data_xr = data_xr.reindex(longitude=list(reversed(data_xr.longitude)))

### Finally, check if the longitude is 0 to 360
min_lon = data_xr['longitude'].min().values
max_lon = data_xr['longitude'].max().values

if max_lon > 180:
    warnings.warn("WARNING: LONGITUDE VALUE EXCEEDS 180. Assuming longitude data is formatted in absolute (0 to 360) and adjusting to W/E degrees.")
    warnings.warn("Please double check your data to ensure you have the correct coordinate system.")
    data_xr.coords['longitude'] = (data_xr.coordds['longitude'] + 180) % 360 - 180
    data_xr = data_xr.sortby(data_xr.longitude)


In [29]:
### FINALLY, NEED TO CUT TIME TO EVERY 6 HOURS
def check_and_resample(input_data, time_var='time', interval='6h'):
    if time_var not in input_data:
        raise ValueError(f"{time_var} not found in dataset variables")

    time_values = input_data[time_var].values

    time_index = pd.to_datetime(time_values)
    time_diffs = time_index[1:] - time_index[:-1]

    expected_diff = pd.Timedelta(interval)

    if not all(diff <= expected_diff for diff in time_diffs):
        raise ValueError("Time steps must be at least in 6 hour intervals.")

    warnings.warn("Warning: sub-6hrly data identified. Trimming to 6-hourly data for best use in tracker.")
    resampled_dataset = input_data.resample({time_var: interval}).nearest()
    return resampled_dataset

data_xr = check_and_resample(data_xr)



In [27]:
data_xr
#data_xr.to_netcdf(output_data)

In [22]:
#data_xr.time