In [1]:
%matplotlib widget
import numpy as np
import umap
import xarray as xr
import numba
import netCDF4
import matplotlib as mpl
import matplotlib.pyplot as plt
import h5py
import seaborn as sns
# fix random seed for reproducibility
random_state = 1234
np.random.seed(random_state)
# import labelencoder
from sklearn.preprocessing import LabelEncoder
# instantiate labelencoder object
le = LabelEncoder()

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
fn='umap_15_link_subset.nc'

In [4]:
ds=xr.open_dataset(fn).load()

In [5]:
ds

In [6]:
samples = ds.samples.values
radar = ds.radar.values
rain_rate = ds.rain_rate.values
sensor_id = ds.sensor_id.values
timestamp = ds.timestamp.values
samples.shape

(32478, 60)

In [7]:
from sklearn.utils import shuffle

samples, radar, rain_rate, sensor_id, timestamp = shuffle(samples, radar, rain_rate, sensor_id, timestamp)

In [8]:
sns.distplot(rain_rate, kde=False)
plt.yscale('log')
plt.ylabel('count []')
plt.xlabel('rain rate [mmh$^{-1}$]')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [9]:
len(np.unique(sensor_id))

15

In [10]:
min(timestamp)

numpy.datetime64('2017-08-01T02:51:00.000000000')

In [11]:
max(timestamp)

numpy.datetime64('2018-09-30T22:51:00.000000000')

In [12]:
%%time
k=4
trans = umap.UMAP(n_neighbors=k, # number of neighbours for manifold approximation
                      min_dist=0.1, # 
                      n_components=2, # dimension of low dimensional representation
                      metric='manhattan', # metric for distance between points in high dimensional space
                        set_op_mix_ratio=1,
                        init='spectral',
                        random_state=random_state,
                        verbose=1
                     ).fit(samples)

UMAP(a=None, angular_rp_forest=False, b=None, init='spectral',
     learning_rate=1.0, local_connectivity=1.0, metric='manhattan',
     metric_kwds=None, min_dist=0.1, n_components=2, n_epochs=None,
     n_neighbors=4, negative_sample_rate=5, random_state=1234,
     repulsion_strength=1.0, set_op_mix_ratio=1, spread=1.0,
     target_metric='categorical', target_metric_kwds=None,
     target_n_neighbors=-1, target_weight=0.5, transform_queue_size=4.0,
     transform_seed=42, verbose=1)
Construct fuzzy simplicial set
Tue Feb 18 14:57:25 2020 Finding Nearest Neighbors
Tue Feb 18 14:57:25 2020 Building RP forest with 14 trees
Tue Feb 18 14:57:27 2020 NN descent for 15 iterations
	 0  /  15
	 1  /  15
	 2  /  15
	 3  /  15
	 4  /  15
	 5  /  15
Tue Feb 18 14:57:29 2020 Finished Nearest Neighbor Search
Tue Feb 18 14:57:30 2020 Construct embedding
	completed  0  /  200 epochs
	completed  20  /  200 epochs
	completed  40  /  200 epochs
	completed  60  /  200 epochs
	completed  80  /  200 epoch

In [13]:
%%time
embedding2d = trans.transform(samples)
xs2 = embedding2d[:,0]
ys2 = embedding2d[:,1]

CPU times: user 15.2 ms, sys: 435 µs, total: 15.7 ms
Wall time: 14.4 ms


In [25]:
def onpick(event):

    if event.artist!=col: return True

    N = len(event.ind)
    if not N: return True


    for subplotnum, dataind in enumerate(event.ind[:4]):
        axs = fig.add_subplot(421+subplotnum)
        axs.plot(np.arange(0,60), samples[dataind])
#         plt.ylabel('TRSL [dB]')
#         plt.xlabel('time [minutes]')
#         plt.xticks(np.arange(0,61,10))
#         plt.grid()
        axs.text(0.01,0.5, s=radar[dataind],
                transform=ax.transAxes, va='top')
        
    return True

In [26]:
%matplotlib widget
fig = plt.figure(figsize=(12,5))
ax = fig.add_subplot(421)
ax.set_title('click on point to plot time series')
  # 5 points tolerance
col = ax.scatter(xs2, ys2, c=radar, picker=5, s=1, cmap='RdYlGn')

fig.canvas.mpl_connect('pick_event', onpick)
plt.colorbar(col)
plt.tight_layout()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [27]:
import matplotlib.gridspec as gridspec

In [54]:
fig = plt.figure(figsize=(12,5))
ax = fig.add_subplot(321)
ax.set_title('click on point to plot time series')
  # 5 points tolerance
col = ax.scatter(xs2, ys2, c=radar, picker=5, s=1, cmap='RdYlGn')

def onpick(event):

    if event.artist!=col: return True

    N = len(event.ind)
    if not N: return True


    for subplotnum, dataind in enumerate(event.ind[:4]):
        axs = fig.add_subplot(322+subplotnum)
        axs.plot(np.arange(0,60), samples[dataind])
#         plt.ylabel('TRSL [dB]')
#         plt.xlabel('time [minutes]')
#         plt.xticks(np.arange(0,61,10))
#         plt.grid()
        axs.text(0.01,0.5, s=radar[dataind],
                transform=axs.transAxes, va='top')
        
    return True

fig.canvas.mpl_connect('pick_event', onpick)
plt.colorbar(col)
plt.tight_layout()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [81]:
fig, axs = plt.subplots(1,2,figsize=(12,5))
# axs[0].set_title('click on point to plot time series')
  # 5 points tolerance
col = axs[0].scatter(xs2, ys2, c=radar, picker=5, s=1, cmap='bwr_r')

def onpick(event):

    if event.artist!=col: return True

    N = len(event.ind)
    if not N: return True

    axs[1].clear()
    for subplotnum, dataind in enumerate(event.ind[:3]):

        axs[1].plot(np.arange(0,60), samples[dataind], label=sensor_id[dataind]+' '+str(['dry','wet'][int(radar[dataind])]))
#         plt.ylabel('TRSL [dB]')
#         plt.xlabel('time [minutes]')
#         plt.xticks(np.arange(0,61,10))
        axs[1].legend()
#         axs[1].text(0.01,0.5, s=radar[dataind],
#                 transform=axs[1].transAxes, va='top')
        
    return True

fig.canvas.mpl_connect('pick_event', onpick)
plt.colorbar(col, ax=axs[0])
plt.tight_layout()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [76]:
fig3 = plt.figure(figsize=(15,5))
gs = fig3.add_gridspec(3, 4)
f3_ax1 = fig3.add_subplot(gs[:, :2])
f3_ax1.set_title('click on a point')
# f3_ax2 = fig3.add_subplot(gs[0, 2:-1])
# f3_ax3 = fig3.add_subplot(gs[1, 2:-1])
# f3_ax4 = fig3.add_subplot(gs[2, 2:-1])

  # 5 points tolerance
col = f3_ax1.scatter(xs2, ys2, c=radar, picker=5, s=1, cmap='bwr_r')

def onpick(event):

    if event.artist!=col: return True

    N = len(event.ind)
    if not N: return True
    
#     axes={'0': f3_ax2, '1': f3_ax3, '2': f3_ax4, }

    for subplotnum, dataind in enumerate(event.ind[:3]):
#         fig3.delaxes(axes[str(subplotnum)])
#         axes[str(subplotnum)] = fig3.add_subplot(gs[subplotnum, 2:-1])
        f3_ax4 = fig3.add_subplot(gs[0, 2:-1])
        f3_ax4.plot(np.arange(0,60), samples[dataind])
#         plt.ylabel('TRSL [dB]')
#         plt.xlabel('time [minutes]')
#         plt.xticks(np.arange(0,61,10))
#         plt.grid()
        axes[str(subplotnum)].text(0.01,0.5, s=radar[dataind],
                transform=axes[str(subplotnum)].transAxes, va='top')
        
    return True

fig.canvas.mpl_connect('pick_event', onpick)
plt.colorbar(col, ax=f3_ax1)
plt.tight_layout()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [None]:
rain_rate_x=rain_rate.copy()
rain_rate_x[rain_rate_x<0.1]=0.0001

In [None]:
fig = plt.figure(figsize=(30,20))
ax = fig.add_subplot(111)
# ax.set_title('click on point to plot time series')
  # 5 points tolerance
col = ax.scatter(xs2, ys2, c=rain_rate_x, picker=5, s=3, cmap='viridis', norm=mpl.colors.LogNorm())

fig.canvas.mpl_connect('pick_event', onpick)
plt.colorbar(col)
# plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
id_list_num=le.fit_transform(sensor_id)

In [77]:
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
# ax.set_title('click on point to plot time series')
  # 5 points tolerance
col = ax.scatter(xs2, ys2, c=id_list_num, picker=5, s=3, cmap='tab20')

fig.canvas.mpl_connect('pick_event', onpick)
plt.colorbar(col)
# plt.axis('off')
plt.tight_layout()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

NameError: name 'id_list_num' is not defined

In [None]:
samples_norm = samples.copy()
samples_norm = [x-y for x,y in zip(samples_norm, np.min(samples_norm, axis=-1))]

In [None]:
def onpick(event):

    if event.artist!=col: return True

    N = len(event.ind)
    if not N: return True


    figi = plt.figure(figsize=(20,10))
    for subplotnum, dataind in enumerate(event.ind):
        ax = figi.add_subplot(N,1,subplotnum+1)
        ax.plot(np.arange(0,60), samples_norm[dataind])
        plt.ylabel('TRSL [dB]')
        plt.xlabel('time [minutes]')
        plt.xticks(np.arange(0,61,10))
        plt.grid()
        ax.text(0.01,0.5, s=radar[dataind],
                transform=ax.transAxes, va='top')
    figi.show()
    return True

In [None]:
%%time
k=5
trans = umap.UMAP(n_neighbors=k, # number of neighbours for manifold approximation
                      min_dist=0.1, # 
                      n_components=2, # dimension of low dimensional representation
                      metric='manhattan', # metric for distance between points in high dimensional space
                        set_op_mix_ratio=1,
                        init='random',
                        random_state=random_state
                     ).fit(samples_norm)

In [None]:
%%time
embedding2d = trans.transform(samples_norm)
xs2 = embedding2d[:,0]
ys2 = embedding2d[:,1]

In [73]:
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
ax.set_title('click on point to plot time series')
  # 5 points tolerance
col = ax.scatter(xs2, ys2, c=radar, picker=5, s=5, cmap='bwr')

fig.canvas.mpl_connect('pick_event', onpick)
plt.colorbar(col)
plt.tight_layout()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [None]:
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
ax.set_title('click on point to plot time series')
  # 5 points tolerance
col = ax.scatter(xs2, ys2, c=rain_rate_x, picker=5, s=3, cmap='viridis', norm=mpl.colors.LogNorm())

fig.canvas.mpl_connect('pick_event', onpick)
plt.colorbar(col)
# plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
ax.set_title('click on point to plot time series')
  # 5 points tolerance
col = ax.scatter(xs2, ys2, c=id_list_num, picker=5, s=3, cmap=plt.cm.get_cmap('viridis', 15))
fig.canvas.mpl_connect('pick_event', onpick)
plt.colorbar(col)
# plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_title('click on points')

line, = ax.plot(np.random.rand(100), 'o', picker=5)  # 5 points tolerance

def onpick(event):
    thisline = event.artist
    xdata = thisline.get_xdata()
    ydata = thisline.get_ydata()
    ind = event.ind
    points = tuple(zip(xdata[ind], ydata[ind]))
    print('onpick points:', points)

fig.canvas.mpl_connect('pick_event', onpick)

plt.show()


# Dynamic time warping cost function as a distance measure

In [None]:
@numba.njit()
def std_dif(x,y):
    return np.abs(np.std(x)-np.std(y))

@numba.njit()
def path_cost_numba(x, y, accumulated_cost, distances):
    path = np.zeros((len(x)-1+len(y-1),2))
    path[0] = [len(x)-1, len(y)-1]
    cost = 0
    i = len(y)-1
    j = len(x)-1
    n=0
    while i>0 and j>0:
        n=n+1
        if i==0:
            j = j - 1
        elif j==0:
            i = i - 1
        else:
            if accumulated_cost[i-1, j] == min(accumulated_cost[i-1, j-1], accumulated_cost[i-1, j], accumulated_cost[i, j-1]):
                i = i - 1
            elif accumulated_cost[i, j-1] == min(accumulated_cost[i-1, j-1], accumulated_cost[i-1, j], accumulated_cost[i, j-1]):
                j = j-1
            else:
                i = i - 1
                j= j- 1
        path[n]=[j, i]
    path = path[0:n+2]
    for i in range(len(path)):
        a, b = path[i]
        cost = cost + distances[int(b), int(a)]
        
    return path, cost

@numba.njit()
def comp_acc_cost(x, y, distances):
    accumulated_cost = np.zeros((len(y), len(x)))
    accumulated_cost[0,0] = distances[0,0]
    for i in range(1, len(x)):
        accumulated_cost[0,i] = distances[0,i] + accumulated_cost[0, i-1]    
    for i in range(1, len(y)):
        accumulated_cost[i,0] = distances[i, 0] + accumulated_cost[i-1, 0]  
    for i in range(1, len(y)):
        for j in range(1, len(x)):
            accumulated_cost[i, j] = min(accumulated_cost[i-1, j-1], accumulated_cost[i-1, j], accumulated_cost[i, j-1]) + distances[i, j]
            
    return accumulated_cost

@numba.njit()
def dtw_cost(x, y):
    distances = np.zeros((len(y), len(x)))
    for i in range(len(y)):
        for j in range(len(x)):
            distances[i,j] = (x[j]-y[i])**2
    accumulated_cost = comp_acc_cost(x,y,distances)
    cost = path_cost_numba(x, y, accumulated_cost, distances)[1]
    
    return cost

In [None]:
%%time
k=10
trans = umap.UMAP(n_neighbors=k, # number of neighbours for manifold approximation
                      min_dist=0.1, # 
                      n_components=2, # dimension of low dimensional representation
                      metric=std_dif, # metric for distance between points in high dimensional space
                        set_op_mix_ratio=1,
                        init='random',
                        random_state=random_state
                     ).fit(samples_norm)

In [None]:
%%time
embedding2d = trans.transform(samples_norm)
xs2 = embedding2d[:,0]
ys2 = embedding2d[:,1]

In [None]:
fig = plt.figure(figsize=(30,20))
ax = fig.add_subplot(111)
ax.set_title('click on point to plot time series')
  # 5 points tolerance
col = ax.scatter(xs2, ys2, c=radar, picker=5, s=5, cmap='RdYlGn')

fig.canvas.mpl_connect('pick_event', onpick)
plt.colorbar(col)
plt.tight_layout()
plt.show()

In [None]:
fig = plt.figure(figsize=(30,20))
ax = fig.add_subplot(111)
ax.set_title('click on point to plot time series')
  # 5 points tolerance
col = ax.scatter(xs2, ys2, c=rain_rate_x, picker=5, s=3, cmap='viridis', norm=mpl.colors.LogNorm())

fig.canvas.mpl_connect('pick_event', onpick)
plt.colorbar(col)
# plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
fig = plt.figure(figsize=(30,20))
ax = fig.add_subplot(111)
ax.set_title('click on point to plot time series')
  # 5 points tolerance
col = ax.scatter(xs2, ys2, c=id_list_num, picker=5, s=3, cmap=plt.cm.get_cmap('viridis', 15))
fig.canvas.mpl_connect('pick_event', onpick)
plt.colorbar(col)
# plt.axis('off')
plt.tight_layout()
plt.show()