In [1]:
import os
import dask
import numpy as np
from tqdm.dask import TqdmCallback
import pandas as pd
import geopandas as gpd
from pathlib import Path
from tqdm import tqdm
import seaborn as sns
from shapely.geometry import LineString
import matplotlib.pyplot as plt
import geopy.distance
from IPython.display import display, HTML
display(HTML("<style>.container { width:98% !important; }</style>"))
import sys
sys.path.append(r'C:\Users\tacuevas\Documents\GitHub\concorde\modules')
from plots import scatter_interpolate
from shapely.geometry import Point, Polygon, LineString
import warnings
warnings.filterwarnings("ignore")
import itertools
plt.style.use('seaborn-darkgrid')
plt.rcParams.update({'font.size': 14})
from matplotlib.offsetbox import AnchoredText

In [2]:
%%javascript
IPython.notebook.kernel.execute('nb_name = "' + IPython.notebook.notebook_name + '"')

<IPython.core.display.Javascript object>

###### Format Data

In [3]:
pathin = Path(r'T:\Projects\2016-2020-DHS-CRCoE\Tomas\Thesis\data\STORM\raw\STORM_data')
filesIn = [x for x in os.listdir(pathin) if '_NA_' in x]

In [4]:
columns = ['year', 'month', 'yr_tc_number', 'time_step', 'basin_id', 'lat', 'lon', 'min_press',
          'max_ws', 'rad_to_max_ws', 'cat', 'landfall', 'dist_to_land']

In [5]:
dfs = []
for ifile, file in tqdm(enumerate(filesIn)):
    df = pd.read_csv(pathin/file, header = None, names = columns)
    df['year'] = df['year'] + ifile * 1000
    dfs.append(df)
    
dfs = pd.concat(dfs, axis = 0)
dfs.index = range(len(dfs))

10it [00:20,  2.05s/it]


In [6]:
dfs['lon'] = np.mod(dfs['lon'].values - 180, 360.) - 180

In [7]:
index_start = np.where(dfs.time_step == 0)[0]
list0 = []
count = 0
for i, j in zip(index_start[:-1], index_start[1:]):
    list0.extend([count] * (j - i))
    count += 1
list0.extend([count] * (len(dfs) - j))
dfs['tc_number'] = list0

In [8]:
nc = (35.2, -76.8)

In [9]:
@dask.delayed
def dist2NC(y, x, pnt):
    return geopy.distance.geodesic(pnt, (y, x)).km

tasks = [dist2NC(lat, lon, nc) for lat, lon in zip(dfs.lat, dfs.lon)]

with TqdmCallback(desc = "Computing distance"):
    d = dask.compute(tasks, scheduler = 'threads')

dfs['dist_to_NC'] = d[0]

Computing distance:   0%|          | 0/3507558 [00:00<?, ?it/s]

In [10]:
def stormDir(df):
    ''' direction going to as currents
    '''
    x = (df.loc[:, 'lon'].shift(-1) - df.loc[:, 'lon']).values
    y = (df.loc[:, 'lat'].shift(-1) - df.loc[:, 'lat']).values
    d = np.arctan2(x, y)
    d2 = np.rad2deg(d)
    d2 = np.mod(d2, 360)
#     dmean = np.rad2deg(np.nanmean(d))
#     dmean = np.mod(dmean, 360)
    return d2

In [11]:
@dask.delayed
def get_trajectory(df, i):
    dfsub = df[df.tc_number == i]
    d = stormDir(dfsub)#[1]    
    return d

tasks = [get_trajectory(dfs, i) for i in dfs.tc_number.unique()]
# tasks = [get_trajectory(df, i) for i in [88467]]#df.tc_number.unique()]

with TqdmCallback(desc = "Computing distance"):
    aux = dask.compute(tasks, scheduler = 'threads')
    
auxM = list(itertools.chain(*aux))
auxM = list(itertools.chain(*auxM))

dfs['trajectory'] = auxM
dfs = dfs.fillna(method = 'ffill')

Computing distance:   0%|          | 0/109479 [00:00<?, ?it/s]

In [12]:
# @dask.delayed
# def get_mean_trajectory(df, i):
#     dfsub = df[df.tc_number == i]
#     d = stormMeanDir(dfsub)[0]    
#     return [d]*len(dfsub)

# tasks = [get_mean_trajectory(df, i) for i in df.tc_number.unique()]
# # tasks = [get_mean_trajectory(df, i) for i in [88467]]#df.tc_number.unique()]

# with TqdmCallback(desc = "Computing distance"):
#     aux = dask.compute(tasks, scheduler = 'threads')
    
# auxM = list(itertools.chain(*aux))
# auxM = list(itertools.chain(*auxM))

# df['mean_trajectory'] = auxM

In [12]:
def stormVel(df):
    '''
    '''
    vellist = [np.nan] 
    for i, j in zip(df.index[:-1], df.index[1:]):
        p1 = (df.loc[i, 'lat'], df.loc[i, 'lon'])
        p2 = (df.loc[j, 'lat'], df.loc[j, 'lon'])
        dist = geopy.distance.geodesic(p2, p1).km
        vel = dist/3 ##km/hr, time step is 3hr
        vellist.append(vel)
    return vellist

In [15]:
@dask.delayed
def get_stormVel(df, i):
    dfsub = df[df.tc_number == i]
    vel = stormVel(dfsub)#[1]    
    return vel

tasks = [get_stormVel(dfs, i) for i in dfs.tc_number.unique()]

with TqdmCallback(desc = "Computing distance"):
    aux = dask.compute(tasks, scheduler = 'threads')
    
auxM = list(itertools.chain(*aux))
auxM = list(itertools.chain(*auxM))

dfs['forwardSpeed'] = auxM
dfs = dfs.fillna(method = 'bfill')

Computing distance:   0%|          | 0/109479 [00:00<?, ?it/s]

In [26]:
# dfs.to_pickle(r'T:\Projects\2016-2020-DHS-CRCoE\Tomas\Thesis\data\STORM\processed\STORM_NA_R3.pkl')
dfs.to_pickle(r'C:\Users\tacuevas\NCSU\Research\Thesis\STORM_NA_R4.pkl')