## Big Data Visualization
Valkyrie and Icesat 2 data are by nature big data sets that require some special considerations when working with it. The main constraint if you don't have a super computer is memory. The average granule size is in the 10s of MB for IceSat 2 and could be Gigabytes in Valkyrie depending on the order/subsetting. 
This is when libraries like Dask, Vaex and others come into play. This notebook will show you how to use some basic plotting techniques using both libraries to work effectively with lidar data from Valkyrie(IceBridge) and ATL data from IceSat 2.


In [2]:
%%time
import vaex
from datetime import date
df = vaex.open('data/atm1b_data_2020-07-11T20-39.hdf5')
# We're parsing the utc_datetime from Valkyrie into a data type that Vaex understands.
df['date'] = df.utc_datetime.values.astype('datetime64[ns]')
# vaex.vrange() is like numpy.arange but uses 0-memory no matter the length.
df.add_column('index', vaex.vrange(0, len(df)))
# We are going to create a "decimated" dataframe with only 1/100 of the size of the original to plot the big picture faster.
df_decimated = df[(df.index % 100 == 0)]
df

CPU times: user 3.08 s, sys: 1.14 s, total: 4.22 s
Wall time: 3.31 s


#,azimuth,elevation,gps_pdop,gps_time,latitude,longitude,passive_footprint_latitude,passive_footprint_longitude,passive_footprint_synthesized_elevation,passive_signal,pitch,pulse_width,rcv_sigstr,rel_time,roll,utc_datetime,xmt_sigstr,date,index
0,217.554,786.135986,2.7,125130.719,69.140261,-48.969153,0.0,0.0,0.0,0,2.309,22,2232,267.507,-3.416,2016-05-16T12:51:13.719000,2532,2016-05-16 12:51:13.719000000,0.0
1,197.697,790.546997,2.7,125130.765,69.140006,-48.968392,0.0,0.0,0.0,0,2.313,20,1157,267.553,-3.432,2016-05-16T12:51:13.765000,2365,2016-05-16 12:51:13.765000000,1.0
2,202.556,793.505005,2.7,125130.766,69.14005,-48.968575,0.0,0.0,0.0,0,2.313,21,1379,267.55400000000003,-3.432,2016-05-16T12:51:13.766000,2480,2016-05-16 12:51:13.766000000,2.0
3,200.127,792.414001,2.7,125130.766,69.140027,-48.968483,0.0,0.0,0.0,0,2.313,25,1766,267.55400000000003,-3.432,2016-05-16T12:51:13.766000,2579,2016-05-16 12:51:13.766000000,3.0
4,212.276,795.609009,2.7,125130.767,69.140154,-48.968935,0.0,0.0,0.0,0,2.313,24,1831,267.555,-3.4330000000000003,2016-05-16T12:51:13.767000,2700,2016-05-16 12:51:13.767000000,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6676303,332.262,1048.177002,2.3000000000000003,152714.76200000002,68.843993,-49.354151,0.0,0.0,0.0,0,2.42,19,2093,382.034,-1.333,2016-05-16T15:26:57.762000,2436,2016-05-16 15:26:57.762000000,6676303.0
6676304,337.161,1048.296021,2.3000000000000003,152714.76200000002,68.84405,-49.354001,0.0,0.0,0.0,0,2.42,22,3213,382.034,-1.333,2016-05-16T15:26:57.762000,2754,2016-05-16 15:26:57.762000000,6676304.0
6676305,344.512,1048.343018,2.3000000000000003,152714.763,68.844122,-49.35375,0.0,0.0,0.0,0,2.42,20,3401,382.035,-1.333,2016-05-16T15:26:57.763000,2389,2016-05-16 15:26:57.763000000,6676305.0
6676306,342.062,1048.286011,2.3000000000000003,152714.763,68.8441,-49.353837,0.0,0.0,0.0,0,2.42,20,3462,382.035,-1.333,2016-05-16T15:26:57.763000,2291,2016-05-16 15:26:57.763000000,6676306.0


In [None]:
df.describe()

In [None]:
df.widget.heatmap(df.longitude, 
               df.latitude,
               what=vaex.stat.mean(df.elevation),
               shape=1024, 
               limits='minmax',
               colormap='plasma')

In [None]:
# import ipyvolume as ipv
%matplotlib widget
import vaex
import ipyvolume as ipv
import vaex.jupyter
from ipywidgets import widgets
import matplotlib.pyplot as plt
import h5py
import cartopy.crs as ccrs
# fs = h5py.File('data/atm1b_data_2020-07-11T20-39.hdf5')

df_all = vaex.open('data/atm1b_data_2020-07-11T20-39.hdf5')
df_all.add_column('index', vaex.vrange(0, len(df_all)))   # vaex.vrange() is like numpy.arange but uses 0-memory no matter the length.
df = df_all[(df_all.index % 100 == 0)]


plt.figure(figsize=(10,8), dpi= 90)
ax = plt.axes(projection=ccrs.NorthPolarStereo(central_longitude=0)) 
ax.coastlines(resolution='50m', color='black', linewidth=1)
ax.set_extent([-60, -40, 60, 90], ccrs.PlateCarree())
plt.scatter(df.longitude.values, df.latitude.values,c=df.elevation.values, cmap='viridis', vmin=100,vmax=1000,transform=ccrs.PlateCarree())#plt.cm.get_cmap('RdYlGn').reversed()
plt.colorbar(label='elevation', shrink=0.5,extend='both')
plt.show()

In [None]:
df.select(df)

In [None]:
import vaex
import ipyvolume as ipv
import vaex.jupyter
%matplotlib inline

df = vaex.open('data/atm1b_data_2020-07-11T20-39.hdf5')
df.plot( 'longitude', 'latitude', what=vaex.stat.mean(df.elevation), figsize=(12,8))

In [None]:
%matplotlib widget
from ipywidgets import widgets
from ipywidgets import interact, interactive, fixed
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt


fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(111, projection='3d')
ax.view_init(70, 70)

def plot_func(freq):
    print(freq)
    step = 1000
    m = int(freq * step)
    ax.clear()
    ax.scatter(df.longitude.values[m:m+step],
               df.latitude.values[m:m+step],
               df.elevation.values[m:m+step],
               c=df.elevation.values[m:m+step],
               cmap='viridis', s=1)
    ax.axis('tight')
    
    

interact(plot_func, freq = widgets.FloatSlider(value=0,
                                               min=0,
                                               max=60,
                                               step=0.3,
                                               layout={'width': '100%'}))