In [35]:
import numpy as np
import pandas as pd
import xarray as xr

## I. Applying UFunctions On Pandas & Numpy

### Numpy

In [9]:
x1 = np.arange(9.0).reshape((3, 3))

In [18]:
x2 = np.arange(3.0)

In [19]:
x1

array([[0., 1., 2.],
       [3., 4., 5.],
       [6., 7., 8.]])

In [20]:
x2

array([0., 1., 2.])

In [23]:
#Arithmatic Function Operation On Numpy Arrays
x3 = np.add(x1, x2)
x3

array([[ 0.,  2.,  4.],
       [ 3.,  5.,  7.],
       [ 6.,  8., 10.]])

In [28]:
#Trignometric Function Operation On Numpy Arrays
x4 = np.square(x2,x1)
x4

array([[0., 1., 4.],
       [0., 1., 4.],
       [0., 1., 4.]])

In [29]:
#Comparison Function Operation On Numpy Arrays
np.greater_equal([4, 2, 1], [2, 2, 2])

array([ True,  True, False])

In [35]:
np.greater([x2], [x1])

array([[[False, False, False],
        [False, False, False],
        [False, False, False]]])

In [43]:
x5 = x1.astype(float)

In [50]:
def log_fact(n):
    n = np.asarray(n)
    m = np.max(n)
    return np.take(np.cumsum(np.log(np.arange(1, m+1))), n-1)

In [51]:
log_fact(3)

1.791759469228055

In [52]:
log_fact([[10, 15, 23], [14, 15, 8]])

array([[15.10441257, 27.89927138, 51.60667557],
       [25.19122118, 27.89927138, 10.6046029 ]])

### Pandas

In [69]:
#Initialization Of Pandas Series
series = pd.Series([20, 21, 12], index=['London','New York','Helsinki'])

In [70]:
series

London      20
New York    21
Helsinki    12
dtype: int64

In [71]:
#Default Function Initialization.
def square(x):
        return x**2

In [72]:
#Function Call.
series.apply(square)

London      400
New York    441
Helsinki    144
dtype: int64

In [73]:
#Lambda Function Initialization.
series.apply(lambda x: x**2)

London      400
New York    441
Helsinki    144
dtype: int64

In [74]:
#Custom Function Initialization With Arguments.
def add_custom_values(x, **kwargs):
...     for month in kwargs:
...         x+=kwargs[month]
...     return x

In [75]:
#Custom Function Call With Arguments.
series.apply(add_custom_values, june=30)

London      50
New York    51
Helsinki    42
dtype: int64

In [76]:
#Numpy Function Initialization.
series.apply(np.log)

London      2.995732
New York    3.044522
Helsinki    2.484907
dtype: float64

## II.  Import .hdf File With netCDF4

In [1]:
!pip install netcdf4

Collecting netcdf4
  Using cached https://files.pythonhosted.org/packages/eb/aa/b067f3b1a2561f29f5c282d8a0f0f4bba5b13e9bdaa5fcd29005d226c448/netCDF4-1.4.2.tar.gz
    Complete output from command python setup.py egg_info:
    reading from setup.cfg...
    
        HDF5_DIR environment variable not set, checking some standard locations ..
    checking /Users/saviosebastian ...
    checking /usr/local ...
    checking /sw ...
    checking /opt ...
    checking /opt/local ...
    checking /usr ...
      config = configparser.SafeConfigParser()
    Traceback (most recent call last):
      File "<string>", line 1, in <module>
      File "/private/var/folders/wd/6mcc6nzd4qx7cd_y1brbrsqc0000gn/T/pip-install-kx8dzkh7/netcdf4/setup.py", line 371, in <module>
        _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs)
      File "/private/var/folders/wd/6mcc6nzd4qx7cd_y1brbrsqc0000gn/T/pip-install-kx8dzkh7/netcdf4/setup.py", line 319, in _populate_hdf5_info
        raise ValueError('did n

In [2]:
!conda install netcdf4 -y

Solving environment: done

## Package Plan ##

  environment location: /Users/saviosebastian/anaconda3

  added / updated specs: 
    - netcdf4


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    libnetcdf-4.6.1            |       h4e6abe9_1         1.2 MB
    netcdf4-1.4.2              |   py37h08833f9_0         437 KB
    cftime-1.0.2.1             |   py37h1d22016_0         247 KB
    ------------------------------------------------------------
                                           Total:         1.9 MB

The following NEW packages will be INSTALLED:

    cftime:    1.0.2.1-py37h1d22016_0
    hdf4:      4.2.13-h39711bb_2     
    libnetcdf: 4.6.1-h4e6abe9_1      
    netcdf4:   1.4.2-py37h08833f9_0  


Downloading and Extracting Packages
libnetcdf-4.6.1      | 1.2 MB    | ##################################### | 100% 
netcdf4-1.4.2        | 437 KB    | ###############################

In [3]:
from netCDF4 import Dataset

In [4]:
import sys

In [6]:
mod03_name = '/Users/saviosebastian/Documents/Project/CMAC/MYD03.A2002185.0000.061.2017362174430.hdf'
mod06_name = '/Users/saviosebastian/Documents/Project/CMAC/MYD06_L2.A2002185.0000.061.2018003215042.hdf'
print(mod03_name)
print(mod06_name)

/Users/saviosebastian/Documents/Project/CMAC/MYD03.A2002185.0000.061.2017362174430.hdf
/Users/saviosebastian/Documents/Project/CMAC/MYD06_L2.A2002185.0000.061.2018003215042.hdf


In [7]:
rootgrp = Dataset(mod03_name, "r", format="NETCDF3")

In [8]:
latitude = rootgrp.variables["Latitude"][:,:] 
longitude = rootgrp.variables["Longitude"][:,:]

In [9]:
latitude

masked_array(
  data=[[--, --, --, ..., --, --, --],
        [--, --, --, ..., --, --, --],
        [--, --, --, ..., --, --, --],
        ...,
        [30.023902893066406, 30.0213565826416, 30.018844604492188, ...,
         26.660354614257812, 26.649608612060547, 26.63867950439453],
        [30.005863189697266, 30.003368377685547, 30.000919342041016, ...,
         26.643115997314453, 26.632293701171875, 26.621292114257812],
        [29.987855911254883, 29.985397338867188, 29.982986450195312, ...,
         26.62582778930664, 26.614887237548828, 26.603618621826172]],
  mask=[[ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        ...,
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False]],
  fill_value=-999.0,
  dtype=float32)

In [15]:
data_lt = latitude.shape
data_lt

(2040, 1354)

In [16]:
data_lon = longitude.shape
data_lon

(2040, 1354)

In [19]:
print(rootgrp.variables)

OrderedDict([('Latitude', <class 'netCDF4._netCDF4.Variable'>
>f4 Latitude(nscans*10:MODIS_Swath_Type_GEO, mframes:MODIS_Swath_Type_GEO)
    units: degrees
    valid_range: [-90.  90.]
    _FillValue: -999.0
unlimited dimensions: 
current shape = (2040, 1354)
filling on), ('Longitude', <class 'netCDF4._netCDF4.Variable'>
>f4 Longitude(nscans*10:MODIS_Swath_Type_GEO, mframes:MODIS_Swath_Type_GEO)
    units: degrees
    valid_range: [-180.  180.]
    _FillValue: -999.0
unlimited dimensions: 
current shape = (2040, 1354)
filling on), ('Scan Offset', <class 'netCDF4._netCDF4.Variable'>
int8 Scan Offset(nscans*20:MODIS_Swath_Type_GEO, mframes*2:MODIS_Swath_Type_GEO)
    units: km IFOV
    valid_range: [-127  127]
    _FillValue: -128
    scale_factor: 0.006
unlimited dimensions: 
current shape = (4080, 2708)
filling on), ('Track Offset', <class 'netCDF4._netCDF4.Variable'>
int8 Track Offset(nscans*20:MODIS_Swath_Type_GEO, mframes*2:MODIS_Swath_Type_GEO)
    units: km IFOV
    valid_range: [

In [20]:
print(rootgrp.variables.keys())

odict_keys(['Latitude', 'Longitude', 'Scan Offset', 'Track Offset', 'Height Offset', 'Height', 'SensorZenith', 'SensorAzimuth', 'Range', 'SolarZenith', 'SolarAzimuth', 'Land/SeaMask', 'WaterPresent', 'gflags', 'Scan number', 'EV frames', 'SD frames', 'SV frames', 'EV start time', 'SD start time', 'SV start time', 'EV center time', 'Mirror side', 'SD Sun zenith', 'SD Sun azimuth', 'Moon Vector', 'L1 scan quality', 'Geo scan quality', 'orb_pos', 'orb_vel', 'T_inst2ECR', 'attitude_angles', 'sun_ref', 'num_impulse', 'impulse_enc', 'impulse_time', 'Scan Type', 'thermal_correction', 'attitude_quality', 'ephemeris_quality', 'Focal_length', 'band_position', 'detector_space', 'detector_offsets', 'T_offset', 'num_samples'])


In [22]:
for name, variable in rootgrp.variables.items():
    print("=== VARNAME: ", name, " =======")
    for attrname in variable.ncattrs():
        print("{} -- {}".format(attrname, getattr(variable, attrname)))

units -- degrees
valid_range -- [-90.  90.]
_FillValue -- -999.0
units -- degrees
valid_range -- [-180.  180.]
_FillValue -- -999.0
units -- km IFOV
valid_range -- [-127  127]
_FillValue -- -128
scale_factor -- 0.006
units -- km IFOV
valid_range -- [-127  127]
_FillValue -- -128
scale_factor -- 0.006
units -- km
valid_range -- [-127  127]
_FillValue -- -128
scale_factor -- 0.006
units -- meters
valid_range -- [ -400 10000]
_FillValue -- -32767
units -- degrees
valid_range -- [    0 18000]
_FillValue -- -32767
scale_factor -- 0.01
units -- degrees
valid_range -- [-18000  18000]
_FillValue -- -32767
scale_factor -- 0.01
units -- meters
valid_range -- [27000 65535]
_FillValue -- 0
scale_factor -- 25.0
units -- degrees
valid_range -- [    0 18000]
_FillValue -- -32767
scale_factor -- 0.01
units -- degrees
valid_range -- [-18000  18000]
_FillValue -- -32767
scale_factor -- 0.01
valid_range -- [0 7]
_FillValue -- 221
valid_range -- [0 8]
_FillValue -- 255
_FillValue -- 255
_FillValue -- 0
va

In [23]:
rootgrp1 = Dataset(mod06_name, "r", format="NETCDF3")

In [24]:
cloud_mask_allbytes = rootgrp1.variables["Cloud_Mask_1km"][:,:,:] 

In [27]:
cloud_mask_allbytes

masked_array(
  data=[[[--, --],
         [--, --],
         [--, --],
         ...,
         [--, --],
         [--, --],
         [--, --]],

        [[--, --],
         [--, --],
         [--, --],
         ...,
         [--, --],
         [--, --],
         [--, --]],

        [[--, --],
         [--, --],
         [--, --],
         ...,
         [--, --],
         [--, --],
         [--, --]],

        ...,

        [[--, --],
         [--, --],
         [--, --],
         ...,
         [--, --],
         [--, --],
         [--, --]],

        [[--, --],
         [--, --],
         [--, --],
         ...,
         [--, --],
         [--, --],
         [--, --]],

        [[--, --],
         [--, --],
         [--, --],
         ...,
         [--, --],
         [--, --],
         [--, --]]],
  mask=[[[ True,  True],
         [ True,  True],
         [ True,  True],
         ...,
         [ True,  True],
         [ True,  True],
         [ True,  True]],

        [[ True,  True],
 

In [28]:
print(cloud_mask_allbytes.data)

[[[  0   0]
  [  0   0]
  [  0   0]
  ...
  [  0   0]
  [  0   0]
  [  0   0]]

 [[  0   0]
  [  0   0]
  [  0   0]
  ...
  [  0   0]
  [  0   0]
  [  0   0]]

 [[  0   0]
  [  0   0]
  [  0   0]
  ...
  [  0   0]
  [  0   0]
  [  0   0]]

 ...

 [[-73   4]
  [-73   4]
  [-73   4]
  ...
  [-73   4]
  [-73   4]
  [-73   4]]

 [[-73   4]
  [-73   4]
  [-73   4]
  ...
  [-73   4]
  [-73   4]
  [-73   4]]

 [[-73   4]
  [-73   4]
  [-73   4]
  ...
  [-73   4]
  [-73   4]
  [-73   4]]]


In [29]:
print(cloud_mask_allbytes.dtype)

int8


## III. StackOverflow : XArray 

### Rolling With Pandas

In [13]:
da = xr.DataArray(np.linspace(0, 11, num=12),
                      coords=[pd.date_range('15/12/1999',periods=12, freq=pd.DateOffset(months=1))],
                      dims='time')

In [4]:
da

<xarray.DataArray (time: 12)>
array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.])
Coordinates:
  * time     (time) datetime64[ns] 1999-12-15 2000-01-15 ... 2000-11-15

In [5]:
da['time']

<xarray.DataArray 'time' (time: 12)>
array(['1999-12-15T00:00:00.000000000', '2000-01-15T00:00:00.000000000',
       '2000-02-15T00:00:00.000000000', '2000-03-15T00:00:00.000000000',
       '2000-04-15T00:00:00.000000000', '2000-05-15T00:00:00.000000000',
       '2000-06-15T00:00:00.000000000', '2000-07-15T00:00:00.000000000',
       '2000-08-15T00:00:00.000000000', '2000-09-15T00:00:00.000000000',
       '2000-10-15T00:00:00.000000000', '2000-11-15T00:00:00.000000000'],
      dtype='datetime64[ns]')
Coordinates:
  * time     (time) datetime64[ns] 1999-12-15 2000-01-15 ... 2000-11-15

In [6]:
da.rolling(time=3).mean()

<xarray.DataArray (time: 12)>
array([nan, nan,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])
Coordinates:
  * time     (time) datetime64[ns] 1999-12-15 2000-01-15 ... 2000-11-15

In [7]:
da.rolling(time=3).sum()

<xarray.DataArray (time: 12)>
array([nan, nan,  3.,  6.,  9., 12., 15., 18., 21., 24., 27., 30.])
Coordinates:
  * time     (time) datetime64[ns] 1999-12-15 2000-01-15 ... 2000-11-15

In [8]:
da

<xarray.DataArray (time: 12)>
array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.])
Coordinates:
  * time     (time) datetime64[ns] 1999-12-15 2000-01-15 ... 2000-11-15

In [77]:
df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})

In [78]:
df

Unnamed: 0,B
0,0.0
1,1.0
2,2.0
3,
4,4.0


In [79]:
 df.rolling(2)

Rolling [window=2,center=False,axis=0]

In [81]:
df.rolling(2).sum()

Unnamed: 0,B
0,
1,1.0
2,3.0
3,
4,


In [84]:
df2 = pd.DataFrame({'B': [0, 1, 2, 3, 4]})
df2

Unnamed: 0,B
0,0
1,1
2,2
3,3
4,4


In [83]:
 df2.rolling(2).sum()

Unnamed: 0,B
0,
1,1.0
2,3.0
3,5.0
4,7.0


### Rolling With XArray

In [85]:
a = np.array(np.random.randint(1, 90+1,(4,4)),dtype=np.float64)
b = np.array(np.random.randint(1, 360+1,(4,4)),dtype=np.float64)
c = np.random.random_sample(16,)
c = c.reshape(4,4)

In [86]:
dsa = xr.Dataset()

In [87]:
dsa['CloudFraction'] = (('x', 'y'), c)
dsa.coords['latitude'] = (('x', 'y'), a)
dsa.coords['longitude'] = (('x', 'y'), b)

In [88]:
dsa

<xarray.Dataset>
Dimensions:        (x: 4, y: 4)
Coordinates:
    latitude       (x, y) float64 68.0 90.0 50.0 33.0 ... 7.0 12.0 24.0 53.0
    longitude      (x, y) float64 176.0 58.0 91.0 146.0 ... 122.0 207.0 337.0
Dimensions without coordinates: x, y
Data variables:
    CloudFraction  (x, y) float64 0.7131 0.6826 0.8046 ... 0.2672 0.2851 0.8247

In [98]:
dsa.dims

Frozen(SortedKeysDict({'x': 4, 'y': 4}))

In [116]:
dsa['CloudFraction']

<xarray.DataArray 'CloudFraction' (x: 4, y: 4)>
array([[0.713111, 0.682574, 0.804627, 0.856438],
       [0.656098, 0.500684, 0.137913, 0.165222],
       [0.495402, 0.834268, 0.863503, 0.699731],
       [0.009223, 0.267156, 0.285056, 0.824703]])
Coordinates:
    latitude   (x, y) float64 68.0 90.0 50.0 33.0 10.0 ... 7.0 12.0 24.0 53.0
    longitude  (x, y) float64 176.0 58.0 91.0 146.0 ... 162.0 122.0 207.0 337.0
Dimensions without coordinates: x, y

In [110]:
dsa1 = dsa.rolling(x=2).sum()
dsa1

<xarray.Dataset>
Dimensions:        (x: 4, y: 4)
Coordinates:
    latitude       (x, y) float64 68.0 90.0 50.0 33.0 ... 7.0 12.0 24.0 53.0
    longitude      (x, y) float64 176.0 58.0 91.0 146.0 ... 122.0 207.0 337.0
Dimensions without coordinates: x, y
Data variables:
    CloudFraction  (x, y) float64 nan nan nan nan ... 0.5046 1.101 1.149 1.524

In [115]:
dsa1['CloudFraction']

<xarray.DataArray 'CloudFraction' (x: 4, y: 4)>
array([[     nan,      nan,      nan,      nan],
       [1.369209, 1.183258, 0.942541, 1.02166 ],
       [1.1515  , 1.334952, 1.001417, 0.864953],
       [0.504625, 1.101424, 1.148559, 1.524434]])
Coordinates:
    latitude   (x, y) float64 68.0 90.0 50.0 33.0 10.0 ... 7.0 12.0 24.0 53.0
    longitude  (x, y) float64 176.0 58.0 91.0 146.0 ... 162.0 122.0 207.0 337.0
Dimensions without coordinates: x, y

In [121]:
dsa1['CloudFraction'][1][:]

<xarray.DataArray 'CloudFraction' (y: 4)>
array([1.369209, 1.183258, 0.942541, 1.02166 ])
Coordinates:
    latitude   (y) float64 10.0 3.0 9.0 90.0
    longitude  (y) float64 357.0 40.0 81.0 358.0
Dimensions without coordinates: y

In [108]:
dsa.rolling(x=2,y=2).sum()

ValueError: exactly one dim/window should be provided

In [114]:
dsa2 = dsa1.rolling(y=2).sum()
dsa2['CloudFraction']

<xarray.DataArray 'CloudFraction' (x: 4, y: 4)>
array([[     nan,      nan,      nan,      nan],
       [     nan, 2.552467, 2.125799, 1.964201],
       [     nan, 2.486452, 2.336368, 1.86637 ],
       [     nan, 1.606049, 2.249983, 2.672993]])
Coordinates:
    latitude   (x, y) float64 68.0 90.0 50.0 33.0 10.0 ... 7.0 12.0 24.0 53.0
    longitude  (x, y) float64 176.0 58.0 91.0 146.0 ... 162.0 122.0 207.0 337.0
Dimensions without coordinates: x, y

In [33]:
longitude

masked_array(
  data=[[--, --, --, ..., --, --, --],
        [--, --, --, ..., --, --, --],
        [--, --, --, ..., --, --, --],
        ...,
        [15.015028953552246, 15.064740180969238, 15.11345100402832, ...,
         38.32621383666992, 38.37139129638672, 38.41727066040039],
        [15.014087677001953, 15.063920021057129, 15.112410545349121, ...,
         38.32081985473633, 38.366085052490234, 38.41202926635742],
        [15.01251220703125, 15.062713623046875, 15.111570358276367, ...,
         38.31563949584961, 38.36115646362305, 38.4079704284668]],
  mask=[[ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        ...,
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False]],
  fill_value=-999.0,
  dtype=float32)

In [38]:
d = {'Latitude' :  pd.Series(latitude)}

Exception: Data must be 1-dimensional

In [43]:
df1 = pd.DataFrame(latitude)
df1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1344,1345,1346,1347,1348,1349,1350,1351,1352,1353
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
6,,,,,,,,,,,...,,,,,,,,,,
7,,,,,,,,,,,...,,,,,,,,,,
8,,,,,,,,,,,...,,,,,,,,,,
9,,,,,,,,,,,...,,,,,,,,,,


In [62]:
sdf = df1.iloc[2010:2014,1:5]
sdf

Unnamed: 0,1,2,3,4
2010,30.324301,30.321409,30.31852,30.315639
2011,30.306293,30.303467,30.300634,30.29781
2012,30.288284,30.285524,30.282747,30.279968
2013,30.270271,30.267582,30.264862,30.262133


In [67]:
sdf = sdf.astype(float)
sdf

Unnamed: 0,1,2,3,4
2010,30.324301,30.321409,30.31852,30.315639
2011,30.306293,30.303467,30.300634,30.29781
2012,30.288284,30.285524,30.282747,30.279968
2013,30.270271,30.267582,30.264862,30.262133


In [68]:
sdf.dtypes

1    float64
2    float64
3    float64
4    float64
dtype: object

In [None]:
df = pd.DataFrame(d, columns=['CatIndex','Latitude', 'Longitude'])