# Script to extract files according to different dates/times

In [38]:
import pandas as pd
import pytz
import os
from shapely import wkt
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import xarray as xr

In [13]:
def LonTo360(dlon):
    # Convert longitudes to 0-360 deg
    dlon = ((360 + (dlon % 360)) % 360)
    return dlon

In [14]:
def get_by_name(name: str, year: int, ibtrack_file: str):
    data = pd.read_csv(ibtrack_file, low_memory=False)  
    data = data.iloc[1: , :] # remove the row of units
    year_date_of = pd.to_datetime(f'{year}')
    year_date_after = pd.to_datetime(f'{year+1}')
    data = data[data['NAME'] == name]
    data['ISO_TIME'] = pd.to_datetime(data['ISO_TIME'])
    mask = (data['ISO_TIME'] >= year_date_of) & (data['ISO_TIME'] < year_date_after)
    data = data[mask]
    extract_vars = ['NAME', 'ISO_TIME', 'WMO_WIND', 'WMO_PRES', 'LAT', 'LON', 
                    'USA_R34_NE', 'USA_R34_NW', 'USA_R34_SE', 'USA_R34_SW', 'USA_R50_NE', 'USA_R50_NW', 'USA_R50_SE', 'USA_R50_SW', 'USA_R64_NE', 'USA_R64_NW', 'USA_R64_SE', 'USA_R64_SW', 
                   'REUNION_R34_NE', 'REUNION_R34_NW', 'REUNION_R34_SE', 'REUNION_R34_SW', 'REUNION_R50_NE', 'REUNION_R50_NW', 'REUNION_R50_SE', 'REUNION_R50_SW', 'REUNION_R64_NE', 'REUNION_R64_NW', 'REUNION_R64_SE', 'REUNION_R64_SW',
                   'BOM_R34_NE', 'BOM_R34_SE', 'BOM_R34_NW', 'BOM_R34_SW', 'BOM_R50_NE', 'BOM_R50_SE', 'BOM_R50_NW', 'BOM_R50_SW', 'BOM_R64_NE', 'BOM_R64_SE', 'BOM_R64_NW', 'BOM_R64_SW']
    data = data[extract_vars]
    if year != 2021: 
        data = data[data['WMO_WIND'] != ' ']
        data = data[data['WMO_PRES'] != ' ']
    data['LON_180'] = data['LON']
    data['LON']=data['LON'].astype(float).apply(LonTo360)
    return data

## User Settings
* change the "output_filename" or the "dir_path" if necessary

### Note on cell below: Pick a date that you want to choose files that exist before this date ("date_to_pick_for_files_before_this_date" variable)

In [52]:
### user settings
storm_name = "IDA"
storm_year = 2021
date_to_pick_for_files_before_this_date = 202108291500000# the first 4 digits are the year (2021) then the month (08) day (29) UTC hour (16) minute (00) second (00)
before_or_after =  "before" #after
output_filename = f"{storm_name}_{storm_year}_08-29_all_data_c29_{date_to_pick_for_files_before_this_date}_{before_or_after}.nc"
dir_path = "/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/" # '/Users/sophiahu/Documents/MIRS_DATA/' # "nc/n20_atms/2021-08-29/"
ibt_file = '/Users/sophiahu/Downloads/IBTrACS.ALL.v04r00.nc'
ibt_file_csv = '/Users/sophiahu/Downloads/ibtracs.ALL.list.v04r00.csv'

### Note for above:
* If you want to pick an afternoon time, you have to think what the UTC time is

For example: let's say you want the morning CDT (e.g., 8am CDT). Then that would be 1500 UTC. So, the file would be:
* 20210829150000 # this is 2021 Aug. 29 at 1500 UTC 

Then the code below would select files that exist BEFORE this time (so only early morning Aug. 29). Hence the variable name "date_to_pick_for_files_before_this_date".

If you want afternoon files (as in files that have a date AFTER a date), you would have to change the logic from a ">" to a "<". 

For example, change the line in the 2 cells below (the one below "Grab the IMG files that exist before the date above" and "Grab the SND files that exist before the date below"):

* "l =[i for i in range(len(dates)) if dates[i] > date_to_pick]" to become, say:

* "l =[i for i in range(len(dates)) if dates[i] < date_to_pick]"

* Do this for both of the cells




In [16]:
snd_vars_to_keep = ["Player", "Plevel", "PTemp", "PVapor", "PClw", "PRain", "PGraupel"] # unique from IMG

img_vars_to_remove = ["Atm_type", "ChanSel", "SWP", "IWP", "Snow",
                      "SWE", "SnowGS", "SIce", "SIce_MY", "SIce_FY", "SFR",
                     "CldTop", "CldBase", "CldThick", "PrecipType", "RFlag", "SurfM",
                     "WindSp", "WindDir", "WindU", "WindV", "Prob_SF", "quality_information"]

# Grab files

In [17]:
mirs_files = os.listdir(dir_path)
result = get_by_name(storm_name, storm_year, ibt_file_csv)
npts = result.shape[0] # read result DataFrame for storm name and year
mirs_contains_tc = [] # list of MIRS files that contain TC 

In [18]:
mirs_contains_tc = mirs_files

## define IMG and SND files

In [19]:
img_files_storm_final = [i for i in mirs_contains_tc if i.startswith('NPR-MIRS-IMG')]
snd_files_storm_final = [i for i in mirs_contains_tc if i.startswith('NPR-MIRS-SND')]      

## recall that UTC to CDT is -7 hours

## Grab the IMG files that exist before the date above

In [54]:
### the code below will look for IMG files that exist BEFORE (not inclusive) of the date you choose in "date_to_pick"

imgfiles=[sublist for sublist in img_files_storm_final]
dates= [ int(file[24:39]) for file in imgfiles]
if before_or_after == "before": 
    l =[i for i in range(len(dates)) if dates[i] < date_to_pick_for_files_before_this_date] # may need to change the ">" to a "<" depending on what you want to do
else: 
    l =[i for i in range(len(dates)) if dates[i] > date_to_pick_for_files_before_this_date] # may need to change the ">" to a "<" depending on what you want to do
files_afterdate_img= [imgfiles[i] for i in l] 

In [21]:
# print the number of IMG files that exist before this date
len(files_afterdate_img)

29

# Grab the SND files that exist before the date above

In [55]:
### the code below will look SND for files that exist BEFORE (not inclusive) of the date you choose in "date_to_pick"

sndfiles=[sublist for sublist in snd_files_storm_final]
dates= [ int(file[24:39]) for file in sndfiles]

if before_or_after == "before": 
    l =[i for i in range(len(dates)) if dates[i] < date_to_pick_for_files_before_this_date] # may need to change the ">" to a "<" depending on what you want to do
else: 
    l =[i for i in range(len(dates)) if dates[i] > date_to_pick_for_files_before_this_date] # may need to change the ">" to a "<" depending on what you want to do
files_afterdate_snd= [sndfiles[i] for i in l] 

In [24]:
# print the number of SND files that exist before this date
len(files_afterdate_snd)

29

# Now merge the IMG files

In [57]:
img_files_storm_final_one_list = files_afterdate_img #img_files_storm_final #[item for sublist in img_files_storm_final for item in sublist]

img_files_storm_final_one_list.sort()
 #img_files_storm_final #[item for sublist in img_files_storm_final for item in sublist]
ds_img_list = []
    
for file in img_files_storm_final_one_list: 
    print("Hello")
    print(dir_path+file)
    ds_mirs = xr.open_dataset(dir_path + file)
    ds_img_list.append(ds_mirs)

ds_img_merged = xr.concat(ds_img_list, dim='Scanline')

Hello
/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/NPR-MIRS-IMG_v11r6_n20_s202108291514373_e202108291515089_c20210830095407.nc
Hello
/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/NPR-MIRS-IMG_v11r6_n20_s202108291515093_e202108291515409_c20210830095407.nc
Hello
/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/NPR-MIRS-IMG_v11r6_n20_s202108291515413_e202108291516129_c20210830095407.nc
Hello
/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/NPR-MIRS-IMG_v11r6_n20_s202108291516133_e202108291516449_c20210830095408.nc
Hello
/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/NPR-MIRS-IMG_v11r6_n20_s202108291516453_e202108291517169_c20210830095408.nc
Hello
/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/NPR-MIRS-IMG_v11r6_n20_s202108291517173_e202108291517489_c20210830095409.nc
Hello
/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/NPR-MIRS-IMG_v11r6_n20_s202108291650373_e202108291651089_c20210830095516.nc
Hello
/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/NPR-MIRS-IMG_v11r6_n20_s202108291651093_e20210829165

# Now merge the snd files

In [59]:
snd_files_storm_final_one_list = files_afterdate_snd #img_files_storm_final #[item for sublist in img_files_storm_final for item in sublist]

snd_files_storm_final_one_list.sort()
 #img_files_storm_final #[item for sublist in img_files_storm_final for item in sublist]
ds_snd_list = []
    
for file in snd_files_storm_final_one_list: 
    print("Hello")
    print(dir_path+file)
    ds_mirs = xr.open_dataset(dir_path + file)
    ds_snd_list.append(ds_mirs)

ds_snd_merged = xr.concat(ds_snd_list, dim='Scanline')

Hello
/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/NPR-MIRS-SND_v11r6_n20_s202108291514373_e202108291515089_c20210830095407.nc
Hello
/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/NPR-MIRS-SND_v11r6_n20_s202108291515093_e202108291515409_c20210830095407.nc
Hello
/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/NPR-MIRS-SND_v11r6_n20_s202108291515413_e202108291516129_c20210830095407.nc
Hello
/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/NPR-MIRS-SND_v11r6_n20_s202108291516133_e202108291516449_c20210830095408.nc
Hello
/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/NPR-MIRS-SND_v11r6_n20_s202108291516453_e202108291517169_c20210830095408.nc
Hello
/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/NPR-MIRS-SND_v11r6_n20_s202108291517173_e202108291517489_c20210830095409.nc
Hello
/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/NPR-MIRS-SND_v11r6_n20_s202108291650373_e202108291651089_c20210830095516.nc
Hello
/Users/sophiahu/Downloads/MIRS_IDA_2021-08-29/NPR-MIRS-SND_v11r6_n20_s202108291651093_e20210829165

# Now Process the files

### Keep certain SND variables and remove certain IMG variables

In [60]:
ds_snd_merged_keep_vars = ds_snd_merged[snd_vars_to_keep]
ds_img_merged_keep_vars = ds_img_merged.drop(img_vars_to_remove)

### Read the IBTrACS data for the storm

In [61]:
ds_ibt = xr.open_dataset(ibt_file)
storm_name_bytes = bytes(storm_name, 'UTF-8')
ds_storm_all = ds_ibt.where(ds_ibt.name==storm_name_bytes, drop=True)
ds_storm = ds_storm_all.where(ds_storm_all.season==float(storm_year), drop=True)

### Merge the IBTrACs and MIRS data into one file

In [62]:
ds_merged_all = xr.merge([ds_img_merged_keep_vars, ds_snd_merged_keep_vars, ds_storm], compat='override') 

# Add attributes
ds_merged_all.attrs["TC_name"] = storm_name
ds_merged_all.attrs["TC_time_start"] = bytes.decode( ds_storm["iso_time"][:,0].item() )
# ds_merged_all.attrs["TC_time_end"] =  bytes.decode( ds_storm["iso_time"][:,-1].item() ) # len_iso_time = len(i)
ds_merged_all.attrs["TC_minimum_lat"] = round(float(ds_storm["lat"].min()),2)
ds_merged_all.attrs["TC_minimum_lon"] = round(float(ds_storm["lon"].min()),2)
ds_merged_all.attrs["TC_maximum_lat"] = round(float(ds_storm["lat"].max()),2)
ds_merged_all.attrs["TC_maximum_lon"] = round(float(ds_storm["lon"].max()),2)

ds_merged_all.to_netcdf(output_filename)

  ds_merged_all.to_netcdf(output_filename)


In [30]:
ds_merged_all