<p style="float:right">
<img src="images/logos/cu.png" style="display:inline" />
<img src="images/logos/cires.png" style="display:inline" />
<img src="images/logos/nasa.png" style="display:inline" />
</p>

# Python, Jupyter & pandas: Module 4

## Using pandas for analysis

In [None]:
%matplotlib inline
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt 

We can find a nice long in time, but very coarse dataset of northern hemisphere snowcover from David Robinson at Rutgers.

http://climate.rutgers.edu/snowcover/docs.php?target=datareq

_Robinson, David A., Estilow, Thomas W., and NOAA CDR Program (2012):NOAA Climate Date Record (CDR) of Northern Hemisphere (NH) Snow Cover Extent (SCE), Version 1. [indicate subset used]. NOAA National Climatic Data Center. doi:10.7289/V5N014G9 [access date]._

In [None]:
import netCDF4

In [None]:
snowcover_url = 'http://www.ncdc.noaa.gov/thredds/dodsC/cdr/snowcover/nhsce_v01r01_19661004_latest.nc'

open and connect the opendap endpoint 

In [None]:
%%time 
ds = netCDF4.Dataset(snowcover_url)

In [None]:
ds.ncattrs()

In [None]:
ds.title

In [None]:
ds.variables.keys()

In [None]:
%%time
all_data = ds.variables['snow_cover_extent'][:, :, :]

In [None]:
latitude = ds.variables['latitude']
longitude = ds.variables['longitude']


In [None]:
land = ds.variables['land']

In [None]:
plt.imshow(land[:], cmap='Accent')

In [None]:
longitude[:].shape

In [None]:
print(all_data.size, all_data.shape)

In [None]:
plt.imshow(all_data[0,:,:], cmap='Accent', interpolation='nearest')

In [None]:
from mpl_toolkits.basemap import Basemap

In [None]:
plt.figure(figsize=(10, 10))
m = Basemap(projection='npstere', boundinglat=30, lon_0=-105)
m.drawcoastlines()
m.pcolor(longitude[:], latitude[:], land, latlon=True, cmap='Accent')
plt.draw()



In [None]:
time = ds.variables['time']

In [None]:
times = netCDF4.num2date(time[:], time.units)

In [None]:
total_cells = a.sum(axis=0)

In [None]:
with mpl.rc_context({'figure.figsize': (10,10)}):
    plt.imshow(total_cells, interpolation='nearest')

In [None]:
import pandas as pd
import xarray as xr

In [None]:
all_data.shape

In [None]:
dset = xr.Dataset({'snowcover': (('time', 'row', 'col'), all_data)},
                  {'time': pd.DatetimeIndex(times)})

In [None]:
dset.dims

In [None]:
dset.time[0]

In [None]:
dset.sel(time=['1966-10-10'])

In [None]:
xr.DataArray(np.random.randn(2, 3))


In [None]:
data = xr.DataArray(np.arange(6.).reshape(2, 3), [('x', ['a', 'b']), ('y', [-2, 0, 2])])


In [None]:
data

In [None]:
xr.DataArray(pd.Series(range(3), index=list('abc'), name='foo'))


In [None]:
data.attrs

In [None]:
data[:,[0,1,2]]                      # 

In [None]:
data.loc[:,:]

In [None]:
data.loc['b':'a':-1]

In [None]:
data.isel(x=slice(0,2,1))

In [None]:
data.sel(x='a')

In [None]:
a = xr.DataArray(np.random.randn(3), [data.coords['y']])

In [None]:
b = xr.DataArray(np.random.randn(4), dims='z')


In [None]:
a

In [None]:
b

In [None]:
a + b

In [None]:
data.T - data

In [None]:
data[:-1] - data[:1]

In [None]:
labels = xr.DataArray(['E', 'F', 'E'], [data.coords['y']], name='labels')


In [None]:
labels

In [None]:
data.groupby(labels).groups

In [None]:
data

In [None]:
data.groupby(labels).min('y').to_series()

In [None]:
data

In [None]:
data.to_series()

In [None]:
dsex = data.to_dataset(name='foo')


In [None]:
dsex

In [None]:
dsex.to_netcdf('example.nc')

In [None]:
ds2 = xr.open_dataset(snowcover_url)

In [None]:
ds2

In [None]:
ds2.coord_system