# Demonstration of selecting MCS locations at a given time

Gives an idea about how you can select the latitude, longitude and track_id from a given time.

In [1]:
import datetime as dt
from pathlib import Path

import numpy as np
import pandas as pd
import xarray as xr

In [2]:
# Edit these based on where these are on your system.
statsdir = Path('/gws/nopw/j04/mcs_prime/mmuetz/data/MCS_Global/stats')
pixeldir = Path('/gws/nopw/j04/mcs_prime/mmuetz/data/MCS_Global/mcstracking')

In [3]:
stats_paths = sorted(statsdir.glob('mcs_tracks_final_extc_????????.0000_????????.0000.nc'))
print(stats_paths[0])
print(stats_paths[-1])

/gws/nopw/j04/mcs_prime/mmuetz/data/MCS_Global/stats/mcs_tracks_final_extc_20000601.0000_20010101.0000.nc
/gws/nopw/j04/mcs_prime/mmuetz/data/MCS_Global/stats/mcs_tracks_final_extc_20200101.0000_20210101.0000.nc


In [4]:
# A single year can be opened using:
dstracks_2020 = xr.open_dataset(stats_paths[-1])

In [5]:
# The times have a small offset from the exact times -- e.g. 34500 ns off. Correct this.
# This is *really important* if you want to select by time.
def round_times_to_nearest_second(dstracks, fields):
    def remove_time_incaccuracy(t):
        # To make this an array operation, you have to use the ns version of datetime64, like so:
        return (np.round(t.astype(int) / 1e9) * 1e9).astype("datetime64[ns]")

    for field in fields:
        dstracks[field].load()
        tmask = ~np.isnan(dstracks[field].values)
        dstracks[field].values[tmask] = remove_time_incaccuracy(
            dstracks[field].values[tmask]
        )

In [6]:
round_times_to_nearest_second(dstracks_2020, ['base_time', 'start_basetime', 'end_basetime'])

In [7]:
dstracks_2020

In [8]:
# Note, times that are NaT (not a time) are equivalent to nans.
# This is a 2d numpy array. Each row contains the times for a given MCS track, with the first column being the first time etc.
dstracks_2020.base_time

In [11]:
# Pick a datetime that you are interested in. Must be 30mins past the hour.
# Using pandas makes it easy to convert between different time formats.
pd_datetime = pd.Timestamp('2020-06-01 12:30')
np_datetime = pd_datetime.to_numpy()
py_datetime = pd_datetime.to_pydatetime()

# This selects all times that match the chosen time. Need a numpy datetime for comparison.
# time_mask is a 2d array, and its shape matches base_time.
# It is True where there is a match.
time_mask = (dstracks_2020.base_time.values == np_datetime)
print(f'MCS track points that match {pd_datetime}: {time_mask.sum()}')

# You can select the track id by saying are there *any* values in the row that are True.
# This is different from the track point values selected for lat, lon.
track_ids = dstracks_2020.tracks.values[time_mask.any(axis=1)]
lat_at_time = dstracks_2020.meanlat.values[time_mask]
lon_at_time = dstracks_2020.meanlon.values[time_mask]
# You can select other properties with e.g. ds_tracks_2020.area.values[time_mask]
# Or the corresponding track
for track_id, lat, lon in zip(track_ids, lat_at_time, lon_at_time):
    # Here you could do a comparison to calculate the distance from the lat/lon of a given MCS to tornadoes.
    print(track_id, lon, lat)
    

MCS track points that match 2020-06-01 12:30:00: 83
12396 50.27713 15.502525
12430 69.842415 8.555687
12440 -19.419847 7.6213827
12452 -32.165646 -39.200024
12482 -127.16564 -40.336716
12492 117.88728 26.110088
12494 104.10861 2.3603446
12505 -66.51667 8.03889
12511 -50.844612 -2.6311448
12521 -57.801994 -2.5045166
12523 161.58029 3.3009884
12526 120.0826 7.508861
12530 151.63557 -9.947532
12532 118.57864 42.708866
12533 2.992444 5.9287124
12535 -174.2736 -14.401964
12536 -62.623177 27.794443
12541 -81.72333 22.013811
12548 99.23261 1.845652
12549 -92.472 12.672942
12550 86.69207 9.033995
12553 -88.91038 19.213747
12555 -115.91988 8.608996
12557 -69.07915 -4.4049816
12559 -167.78352 -30.771395
12560 -91.24787 56.009357
12562 1.0320408 3.7495918
12563 -71.77625 -2.3574646
12564 159.07274 -37.803032
12565 -56.609867 46.76264
12566 -130.97847 8.289191
12567 104.08873 33.32616
12569 6.0586133 9.305855
12570 -62.8446 1.9253778
12571 -82.597694 5.3654532
12572 115.67156 5.6284413
12573 -63.6

In [12]:
# A single track can be selected from its track number:
track = dstracks_2020.sel(tracks=track_ids[0])
track