<a href="https://colab.research.google.com/github/raquelcarmo/tropical_cyclones/blob/main/src/code/TC_best_tracks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##TC Best Tracks

This script associates the images dataset to the best track of each tropical cyclone and allows for data retrieval to be inserted in the parametric model.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# getting in the directory 
#%ls
# insert your path
%cd /content/drive/My\ Drive/ESRIN_PhiLab/Tropical_Cyclones/data
%ls

In [None]:
#!pip install imgaug==0.2.6
#!pip uninstall shapely
#!pip install shapely --no-binary shapely
#!apt-get install libproj-dev proj-data proj-bin  
#!apt-get install libgeos-dev  
#!pip install cython  
#!pip install cartopy  
#!pip install geoviews
#!pip install rasterio
!pip install netcdf4
#!pip install rioxarray

In [None]:
# general import
import bokeh.io
bokeh.io.output_notebook()
#import geoviews as gv
#import geoviews.feature as gf
#gv.extension('bokeh','matplotlib')
import pandas as pd
import numpy as np
import xarray as xr
#import rasterio as rio
#import rioxarray # geospatial extension for xarray
import os
import matplotlib.pyplot as plt
#import cartopy.crs as ccrs
#import cartopy
#from tqdm.auto import tqdm
#import netCDF4
import glob
import argparse
import cv2
from google.colab.patches import cv2_imshow
import math 
from math import radians, cos, sin, asin, sqrt
from dateutil import parser
import datetime

General settings to be changed accordingly.

In [None]:
# insert here category, mission and feature you want to extract
CATEGORY = "cat1" # either 1, 2, 3, 4, 5
MISSION = "sar" # either sar, smap or smos
if MISSION == "sar":
  KEY = "s1"
  FEATURE_TO_SAVE = "nrcs_detrend"
  CHANNELS = ["nrcs_detrend_cross", "nrcs_detrend_co"]
elif MISSION == "smap":
  KEY = "*smap"
  FEATURE_TO_SAVE = "wind_speed"
else: # smos
  KEY = "SM"
  FEATURE_TO_SAVE = "wind_speed"

# set download path
download_path = CATEGORY   # Tropical_Cyclones/data/cat1
os.makedirs(download_path, exist_ok=True)

# set saving directory accordingly
SAVING_DIRECTORY = "{}/{}_{}_{}".format(CATEGORY, FEATURE_TO_SAVE, MISSION, CATEGORY)   # cat1/nrcs_detrend_sar_cat1
os.makedirs(SAVING_DIRECTORY, exist_ok=True)

BEST_TRACK_PATH = "{}/{}_{}_best_tracks".format(CATEGORY, MISSION, CATEGORY)    # cat1/sar_cat1_best_tracks
os.makedirs(BEST_TRACK_PATH, exist_ok=True)

# set format of output images
SAVE_FORMAT = "png"

BB_WIDTH = 75 # pixels
TIME_DELTA = 10 # hours
THRESHOLD_DISTANCE = 200 # Km

Script to associate the information relative to the TC best track with the images from Ifremer. Information is stored in CSV files.

In [None]:
# load generic best tracks CSV file
TC_dataset = pd.read_csv("best_track/ibtracs.since1980.list.v04r00.csv", header=0)
iso_column = TC_dataset["ISO_TIME"]

file_list = glob.glob('{}/*.{}'.format(SAVING_DIRECTORY, SAVE_FORMAT))    # cat1/nrcs_detrend_sar_cat1/*.png

for single_file in file_list:
  # get iso date and time of TC from the filename
  file_name = os.path.basename(single_file)[:-4]

  # get the portion of the filename corresponding to the iso date
  file_name_iso = file_name[0:10]

  # get rows in the TC best track that are taken in the same day of the TC
  img_info = TC_dataset.loc[TC_dataset['ISO_TIME'].str.contains(file_name_iso)]

  # save relevant information in a csv file that could be retrived with pandas in the future
  img_info.to_csv("{}/{}.csv".format(BEST_TRACK_PATH, file_name_iso))

In [None]:
# Helper functions
def point_distance(lon1, lat1, lon2, lat2):
  """
  Calculate the great circle distance between two points 
  on the earth (specified in decimal degrees)
  """
  # convert decimal degrees to radians 
  lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
  # haversine formula 
  dlon = lon2 - lon1 
  dlat = lat2 - lat1 
  a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
  c = 2 * asin(sqrt(a)) 
  # Radius of earth in kilometers is 6371
  km = 6371* c
  return km

def knots_to_m_sec(kts):
  """ Converts knots (kt) to meters/second (m/s) """
  if np.isnan(kts):
    return kts
  else:
    return kts * 0.514444

def nmiles_to_km(nm):
  """ Converts Nautical Miles (nm) to kilometers (km) """
  if np.isnan(nm):
    return nm
  else:
    return nm / 0.53995680

In [None]:
# construct the argument parser and parse the arguments
image_name_list = glob.glob("{}/*.{}".format(SAVING_DIRECTORY, SAVE_FORMAT))   # cat1/nrcs_detrend_sar_cat1/*.png
nc_name_list = glob.glob("{}/{}*.nc".format(CATEGORY, KEY))    # cat1/s1*.nc
best_track_name_list = glob.glob("{}/*.csv".format(BEST_TRACK_PATH))    # cat1/sar_cat1_best_tracks/*.csv

np.set_printoptions(threshold=1000)


for image_name in image_name_list:
  # ensure to have the right image, nc and csv files
  best_track_name = "empty"
  for best_track_name_i in best_track_name_list:
    if os.path.basename(image_name)[:-13] == os.path.basename(best_track_name_i)[:-4]:
      best_track_name = best_track_name_i
      break

  if best_track_name == "empty":
    print("best track not found")
    continue

  nc_image = "empty"
  for nc_name_i in nc_name_list:
    nc_image_i = netCDF4.Dataset(nc_name_i, mode='r') 
    tmax_units  = nc_image_i.variables['time'].units
    if os.path.basename(image_name)[:-4] == tmax_units[11:]:
        nc_image = nc_image_i
        break

  if nc_image == "empty":
    print("nc not found")
    continue

  # extract the longitude and latidute in decimal degrees from the nc file
  referement_lon_matrix = nc_image.variables["lon"][:]
  referement_lat_matrix  = nc_image.variables["lat"][:]
  #print("coordinate referement lon matrix: {}".format(referement_lon_matrix))

  # get only the element in the middle as reference
  referement_lon = referement_lon_matrix[len(referement_lon_matrix)//2]
  referement_lat = referement_lat_matrix[len(referement_lat_matrix)//2]

  # extract estimated latitude and longitude
  best_track_info = pd.read_csv(best_track_name)

  # deleting element with time too distant wrt our reference
  referement_date = parser.parse(os.path.basename(image_name)[:-3])

  best_track_info["ISO_TIME"] =  best_track_info["ISO_TIME"].apply(lambda x: parser.parse(x)) 

  best_track_info["ISO_TIME_DIFF"] = (referement_date - best_track_info["ISO_TIME"])
  best_track_info["ISO_TIME_DIFF"] = best_track_info["ISO_TIME_DIFF"].apply(lambda x: x.total_seconds()/3600)

  best_track_info = best_track_info[best_track_info.ISO_TIME_DIFF < TIME_DELTA]

  # delete null elements
  estimated_lon_df = pd.to_numeric(best_track_info.USA_LON, errors='coerce').dropna()
  estimated_lat_df  = pd.to_numeric(best_track_info.USA_LAT, errors='coerce').dropna()

  concat_lon_lat = pd.concat([estimated_lon_df, estimated_lat_df], axis=1)

  # compute distance between reference and estimated
  concat_lon_lat["distance_with_my_TC"] = concat_lon_lat.apply(lambda x: point_distance(x.USA_LON, x.USA_LAT, referement_lon, referement_lat), axis=1, result_type = "expand")
  #print(concat_lon_lat)
  
  # prune the values that have distance too large wrt our nc file's coodinates
  concat_lon_lat = concat_lon_lat[concat_lon_lat["distance_with_my_TC"] < THRESHOLD_DISTANCE]
  #print("after pruning: {}".format(concat_lon_lat))

  # get my estimated center
  estimated_lon = concat_lon_lat.USA_LON.mean()   
  estimated_lat = concat_lon_lat.USA_LAT.mean()   

  # get the indexes of the closest longitude and latitude in the image wrt the estimated values
  key_lon = min(range(len(referement_lon_matrix)), key=lambda i: abs(referement_lon_matrix[i]-estimated_lon))
  key_lat = min(range(len(referement_lat_matrix)), key=lambda i: abs(referement_lat_matrix[i]-estimated_lat))

  if key_lon == 0 or key_lon == len(referement_lon_matrix) or key_lat == 0 or key_lat == len(referement_lat_matrix):
    print("TC not found")
    continue
  #print("estimated: {}, closest: {}, next: {}".format(estimated_lon,referement_lon_matrix[key_lon],referement_lon_matrix[key_lon+1]))

  ############################################
  # plot image and bounding box around the eye
  image = cv2.imread(image_name)

  # draw a bounding box square and label on the image
  color = (0,0,255)
  #cv2.rectangle(image, (key_lon, key_lat), (key_lon + BB_WIDTH, key_lat + BB_WIDTH), color, 2)
  cv2.rectangle(image, (key_lon - BB_WIDTH, key_lat - BB_WIDTH), (key_lon + BB_WIDTH, key_lat + BB_WIDTH), color, 2)
  cv2_imshow(image)

  #cv2.imwrite("./BB_TRIALS/{}".format(os.path.basename(image_name)), image)
  #cv2.imwrite("./{}/{}".format(IMAGE_DATA_DIRECTORY, os.path.basename(image_name)), image)

  # here I have to create a .txt file for each image
  # add empty txt files for the background images
  #object label x_center y_center width[0,1] height[0,1]

In [None]:
# load dataframe connecting .nc filename to TC Name and USA_ATCF_ID
nc_ID = pd.read_csv("best_track/Cyclobs_info_names.csv", header=0)

for CAT in range(1, 6):
  # change directories accordingly
  BEST_TRACK_DIRECTORY = "{}/{}_{}_best_tracks".format("cat" + str(CAT), MISSION, "cat" + str(CAT))
  IMAGE_DIRECTORY = "{}/{}_{}_{}".format("cat" + str(CAT), FEATURE_TO_SAVE, MISSION, "cat" + str(CAT))

  best_track_file_list = glob.glob("{}/*.csv".format(BEST_TRACK_DIRECTORY))
  nc_file_list = glob.glob("{}/{}*.nc".format("cat" + str(CAT), KEY))    # cat1/s1*.nc
  image_file_list = glob.glob("{}/*.{}".format(IMAGE_DIRECTORY, SAVE_FORMAT))   # cat1/nrcs_detrend_sar_cat1/*.png

  # initialize list of values for each parameter
  Vt = list()   # Translation Speed
  Dt = list()   # Translation Direction
  R_Vm = list()   # Maximum Radius Winds
  Pcs = list()    # Central Pressure
  Pns = list()    # External Pressure
  Phi = list()    # Absolute Latitude
  Vmax = list()   # Maximum sutained wind speed

  if (best_track_file_list == [] or nc_file_list == [] or image_file_list == []):
    print("No files found.")
    continue

  for image_name in image_file_list:
    # ensure to have the right image, nc and csv files
    best_track_name = None
    for best_track_filename in best_track_file_list:
      if os.path.basename(image_name)[:-13] == os.path.basename(best_track_filename)[:-4]:  # 2019-06-12
        best_track_path = best_track_filename
        best_track_name = os.path.basename(best_track_path)
        break

    if best_track_name is None:
      print("best track not found!")
      continue

    nc_name = None
    for nc_filename in nc_file_list:
      nc_info = netCDF4.Dataset(nc_filename, mode='r') 

      if MISSION == "sar":
        tmax_units  = nc_info.variables['time'].units
      else: # smap or smos
        tmax_units  = nc_info.measurement_start_date

      if os.path.basename(image_name)[:-4] == tmax_units[11:]:  # 2019-06-12 01:18:27
          np_path = nc_filename
          nc_name = os.path.basename(np_path)
          break

    if nc_name is None:
      print("nc file not found!")
      continue

    print(nc_name, best_track_name)
    row = nc_ID.loc[nc_ID['data_url'] == nc_name]
    ID = np.unique(row['sid'])[0].upper()   # uppercase letters in ID to match the ones in best_track

    # load best track dataframe
    best_track_info = pd.read_csv(best_track_path)

    # filter dataframe to ID of TC and replace white spaces for np.nan
    df = best_track_info.loc[best_track_info['USA_ATCF_ID'] == ID].replace(r'^\s*$', np.nan, regex=True)
    if df.empty:
      continue
    
    STORM_SPEED = pd.to_numeric(df['STORM_SPEED'], errors='coerce').dropna()
    STORM_DIR = pd.to_numeric(df['STORM_DIR'], errors='coerce').dropna()
    USA_RMW = pd.to_numeric(df['USA_RMW'], errors='coerce').dropna()
    TOKYO_PRES = pd.to_numeric(df['TOKYO_PRES'], errors='coerce').dropna()
    USA_POCI = pd.to_numeric(df['USA_POCI'], errors='coerce').dropna()
    USA_LAT = pd.to_numeric(df['USA_LAT'], errors='coerce').dropna()
    USA_WIND = pd.to_numeric(df['USA_WIND'], errors='coerce').dropna()
    
    # append mean of parameters' values to corresponding arrays
    Vt.append(knots_to_m_sec(STORM_SPEED.mean()))
    Dt.append(STORM_DIR.mean())
    R_Vm.append(nmiles_to_km(USA_RMW.mean()))
    Pcs.append(TOKYO_PRES.mean())  # mb = hPa
    Pns.append(USA_POCI.mean())   # mb = hPa
    Phi.append(USA_LAT.mean())
    Vmax.append(knots_to_m_sec(USA_WIND.mean()))
    # parameters missing: dPcst

  # save lists of values for each parameter for each category
  filename = "{}/parametric_values.csv".format(BEST_TRACK_DIRECTORY)   # cat1/sar_cat1_best_tracks/parametric_values.csv
  parameters_df = pd.DataFrame(data={"Vt (m/s)": Vt, "Dt (degrees)": Dt, "R_Vm (km)": R_Vm, "Pcs (hPa)": Pcs, "Pns (hPa)": Pns, "Phi (degrees)": Phi, "Vmax (m/s)": Vmax})
  parameters_df.to_csv(filename, sep=',', index = False)

In [None]:
from scipy.interpolate import interp1d

# load generic best tracks CSV file
TC_dataset = pd.read_csv("best_track/ibtracs.since1980.list.v04r00.csv", header=0)

# load dataframe connecting .nc filename to TC Name and USA_ATCF_ID
nc_ID = pd.read_csv("SAR_swath_nc/tc_dataframe.csv", header=0)

file_list = glob.glob('SAR_swath_nc/*/*.nc')
files = []
ids = []
cyclob_Vmax_list = []
ws_Vmax_list = []
best_track_Vmax_list = []
tmax_list=[]

for single_file in file_list:

  nc_filename = os.path.basename(single_file)
  df = nc_ID[nc_ID.data == nc_filename]
  cyclob_Vmax = df['vmax (m/s)'].values
  id = df.sid.values

  full_info_image = netCDF4.Dataset(single_file, mode='r') 
  tmax_units = datetime.datetime.strptime(full_info_image.measurementDate, '%Y-%m-%dT%H:%M:%SZ')
  tmax = tmax_units.strftime('%Y-%m-%d %H_%M_%S')

  feature_wind = full_info_image.variables["wind_speed"][:]
  ws_Vmax = np.max(feature_wind[0])
  #print(ws_Vmax)

  day = tmax[:10]

  # get iso date and time of TC from the filename
  #file_name = os.path.basename(single_file)[:-4]

  # get the portion of the filename corresponding to the iso date
  #file_name_iso = file_name[0:10]

  # get rows in the TC best track that are taken in the same day of the TC
  img_info = TC_dataset.loc[TC_dataset.ISO_TIME.str.contains(day)]
  img_info = img_info.loc[img_info.USA_ATCF_ID == id[0].upper()]

  for index, row in img_info.iterrows():
    img_info['ISO_TIME'][index] = datetime.datetime.strptime(img_info['ISO_TIME'][index], '%Y-%m-%d %H:%M:%S')
  img_info['USA_WIND'] = img_info['USA_WIND'].astype(float)
  aux = img_info[['ISO_TIME', 'USA_WIND']]

  df2 = pd.DataFrame([[tmax_units, np.nan]], columns=['ISO_TIME', 'USA_WIND'])
  #print(df2)
  aux = aux.append(df2, ignore_index=True)
  aux = aux.sort_values(by='ISO_TIME')
  #print(aux)

  interp = aux.interpolate()
  #print(interp)
  best_track_Vmax = interp.USA_WIND.loc[interp.ISO_TIME == tmax_units].values
  #print(best_track_Vmax)

  files.append(single_file)
  ids.append(id[0])
  cyclob_Vmax_list.append(cyclob_Vmax[0])
  best_track_Vmax_list.append(knots_to_m_sec(best_track_Vmax[0]))
  ws_Vmax_list.append(ws_Vmax)
  tmax_list.append(tmax)

data = {"nc": files, "sid": ids, "tmax": tmax_list, "Vmax_ws": ws_Vmax_list, "Vmax_cyclob": cyclob_Vmax_list, "Vmax_best_track": best_track_Vmax_list}
final_df = pd.DataFrame(data)
final_df.to_csv("SAR_swath_Vmax/Vmax_info.csv", index = False)

# save relevant information in a csv file that could be retrived with pandas in the future
#img_info.to_csv("{}/{}.csv".format(BEST_TRACK_PATH, file_name_iso))

In [None]:
request_url1="https://cyclobs.ifremer.fr/app/api/getData?cat_min=cat-1&cat_max=cat-2&mission=S1B,S1A&product_type=swath&include_cols=all"
request_url2="https://cyclobs.ifremer.fr/app/api/getData?cat_min=cat-2&cat_max=cat-3&mission=S1B,S1A&product_type=swath&include_cols=all"
request_url3="https://cyclobs.ifremer.fr/app/api/getData?cat_min=cat-3&cat_max=cat-4&mission=S1B,S1A&product_type=swath&include_cols=all"
request_url4="https://cyclobs.ifremer.fr/app/api/getData?cat_min=cat-4&cat_max=cat-5&mission=S1B,S1A&product_type=swath&include_cols=all"
request_url5="https://cyclobs.ifremer.fr/app/api/getData?cat_min=cat-5&mission=S1B,S1A&product_type=swath&include_cols=all"
df1 = pd.read_csv(request_url1)
df1["request_url"] = request_url1
df2 = pd.read_csv(request_url2)
df2["request_url"] = request_url2
df3 = pd.read_csv(request_url3)
df3["request_url"] = request_url3
df4 = pd.read_csv(request_url4)
df4["request_url"] = request_url4
df5 = pd.read_csv(request_url5)
df5["request_url"] = request_url5
#print(df_request.columns)
#for index, row in df_request.iterrows():
#  df_request["data_url"][index] = df_request["data_url"][index].split('/')[-1]

frames = [df1, df2, df3, df4, df5]
result = pd.concat(frames)#.drop_duplicates(keep='last')

result["data"] = result["data_url"].str.split('/', expand=True).iloc[:,-1:]

#new_df = result[["data", "sid", "eye_in_acq", "acquisition_start_time", "maximum_cyclone_category", "vmax (m/s)"]]
new_df = result[["request_url", "data_url", "sid", "eye_in_acq", "acquisition_start_time", "maximum_cyclone_category", "vmax (m/s)"]]
url = new_df["data_url"]
duplicates = new_df[url.isin(url[url.duplicated()])].sort_values(by=['data_url'])
#duplicates
duplicates.to_csv("SAR_swath_nc/duplicated_nc_dataframe.csv", index = False)


#single_nc = new_df.iloc[3]
#path = 'SAR_swath_nc/category5/'
#full_info_image = netCDF4.Dataset(path + single_nc.data, mode='r')

#tmax_units = datetime.datetime.strptime(full_info_image.measurementDate, '%Y-%m-%dT%H:%M:%SZ')
#tmax_units = tmax_units.strftime('%Y-%m-%d %H_%M_%S')

#single_nc['tmax'] = tmax_units
#print(single_nc)

#np.load("SAR_swath_Vmax/category5/" + tmax_units+"_Vmax.npy")
#feature_co = full_info_image.variables["nrcs_detrend_co"][:]
#bbox = single_nc.bounding_box

#import re
#points = re.findall("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?", bbox)
#points = np.array(points).astype(np.float).reshape((5,2))
#print(points)
#import matplotlib
#fig = plt.figure(figsize=(10,10))
#ax = fig.add_subplot(111)
#plt.imshow(feature_co[0])
#ax.add_patch(matplotlib.patches.Polygon(points, closed = True, color = 'red'))
#print(np.max(feature_co[0]), np.min(feature_co[0]))
#print("feature_co")
#plt.show()