# Manipulating data and metadata in cf-python (version 2)

## Homepage: https://cfpython.bitbucket.io
## Online, searchable documenation: https://cfpython.bitbucket.io/docs/latest

## Contents:

### 1. Read, inspect, write netCDF files
### 2. Subspace
### 3. Data
### 4. Calculate statistics
### 5. Other file formats

In [None]:
import cf
cf.__version__

# 1. Read, inspect and write files
https://cfpython.bitbucket.io/docs/latest/generated/cf.read.html

In [None]:
f = cf.read('ncas_data/IPSL-CM5A-LR_r1i1p1_tas_n96_rcp45_mnth.nc')[0]

In [None]:
f

In [None]:
print f

In [None]:
f.dump()

### Properties

In [None]:
f.properties()

In [None]:
f.getprop('project_id')

In [None]:
f.setprop('project_id', 'banana')
f.getprop('project_id')

In [None]:
f.delprop('project_id')
f.getprop('project_id')

In [None]:
f.getprop('project_id', 'UNSET')

### Shorthand for named CF properties
http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/cf-conventions.html#attribute-appendix

In [None]:
print f.standard_name
f.standard_name = 'banana'
print f.standard_name
del f.standard_name
f.standard_name = 'air_temperature'
print f.standard_name

### Reading many files

In [None]:
fl = cf.read('ncas_data/data[2-7].nc')
fl

In [None]:
for x in fl:
    print 'NAME:', x.name(), 'SHAPE:', x.shape, 'UNITS:', x.units

### Select by list position

In [None]:
g = fl[0]
g

In [None]:
fl[4:]

### Select by metadata
https://cfpython.bitbucket.io/docs/latest/generated/cf.FieldList.select.html

In [None]:
fl.select('air_temperature')

In [None]:
help(fl.select)

In [None]:
fl.select('northward_wind')

In [None]:
fl.select({'units': 'km h-1'})

In [None]:
fl.select('(east|north)ward_wind')

# Write fields to a netCDF file
https://cfpython.bitbucket.io/docs/latest/generated/cf.write.html

In [None]:
cf.write(f, 'new_file.nc')

In [None]:
g = cf.read('new_file.nc')[0]
f.equals(g)

# 2. Subspace a field

https://cfpython.bitbucket.io/docs/latest/generated/cf.Field.subspace.html

### Index-space: [square brackets]

In [None]:
f

In [None]:
print f.subspace[0, 0, 0]

In [None]:
f[0, 0, 0] # shorthand method - leave out the .subspace

In [None]:
f.subspace[0:6, :, :]

### Metadata-space: (round brackets)

In [None]:
print f

In [None]:
print f.subspace(longitude=180) # No shorthand for the "round brackets" form

#### `cf.lt(30)` is a "query" that means *less than 30*

https://cfpython.bitbucket.io/docs/latest/function.html#comparison

In [None]:
print f.subspace(latitude=cf.lt(30)) 

#### `cf.wi(90, 270)` is a query that means *within the range [90, 270]*

In [None]:
print f.subspace(longitude=cf.wi(90, 270))

In [None]:
g = f.subspace(time=cf.dt('1965-11-16'))
print g

In [None]:
# In-line images
%matplotlib inline
# Turn off warnings
import warnings
warnings.filterwarnings("ignore")

import cfplot as cfp
cfp.con(g)

#### `T` is shorthand for *time*

In [None]:
print f.subspace(T=cf.ge(cf.dt('1967-2-18')))

In [None]:
print f.subspace(T=cf.month(4))

In [None]:
print f.subspace(time=cf.dt('1965-11-16'), Y=cf.gt(30))

# 3. The field's data

In [None]:
f.data

#### Get the data as a `numpy` array

In [None]:
f.array # This is numpy array

In [None]:
print type(f.array)
f.array[-1, 2, -3]

In [None]:
f.subspace[-1, 2, -3].array

In [None]:
x = f.copy()
x.subspace[-1, -1, -1] = -999
x.subspace[-1, -1, -1].array

In [None]:
x.subspace[-1, ...] = 888
x.subspace[-1, ...].array

In [None]:
x.subspace[0, ...] = x.subspace[-1, ...] - 111
x.subspace[0, ...].array

### Modify the data where a condition is met
https://cfpython.bitbucket.io/docs/latest/generated/cf.Field.where.html

In [None]:
f.min(), f.mean(), f.max()

#### Set values below 290 to missing data

In [None]:
x = f.where(cf.lt(290), cf.masked)
print x.min(), x.mean(), x.max()
cfp.con(x.subspace[0])

### Manipulate the axes

In [None]:
f.transpose(['X', 'T', 'Y'])

### Modifying the units

In [None]:
f = cf.read('ncas_data/IPSL-CM5A-LR_r1i1p1_tas_n96_rcp45_mnth.nc')[0]
f.units, f.mean()

In [None]:
f.units = 'degC'
f.units, f.mean()

In [None]:
f.Units # Upper case "U" gives a units object that we can manipulate

In [None]:
f.Units += 273.15
f.Units, f.units, f.mean()

### Field arithmetic

In [None]:
f

In [None]:
f.min(), f.mean(), f.max()

In [None]:
g = f + 2
g

In [None]:
g.min(), g.mean(), g.max()

In [None]:
g = f - f
g

In [None]:
g.min(), g.mean(), g.max()

In [None]:
x = f.copy()
x.units = 'degC'
x.data

#### Subtract the celcius field from the Kelvin field and check that the result is zero 

In [None]:
(f - x).mean()

In [None]:
g = f * f
g

#### Find the anomalies relaitive to the first time

In [None]:
first_time = f.subspace[0]
first_time = first_time.transpose(['Y', 'T', 'X'])
first_time

In [None]:
g  = f - first_time
g

In [None]:
g.min(), g.mean(), g.max()

# 4. Statistical operations
https://cfpython.bitbucket.io/docs/latest/generated/cf.Field.collapse.html

In [None]:
g = f.collapse('max')
g

In [None]:
g.data

In [None]:
g = f.collapse('T: mean')
print g
print 'data values:\n', g.data
print '\ntime bounds:\n', g.coord('T').bounds.dtarray

#### Collapse multiple axes simultaneously

In [None]:
g = f.collapse('X: Y: sd')
g

#### Collapse an axis into groups, rather than a single value

In [None]:
g = f.collapse('T: mean', group=cf.seasons())
print g

#### `cf.seasons()` is a list of queries, each of which defines a range of months

In [None]:
cf.seasons()

#### By default, collpases are **not** weighted

In [None]:
g = f.collapse('area: mean', weights='area') # Area mean for each time
g = g.collapse('T: max')                     # Time maxiumum of the area means
g.data
print g

### File aggregation

#### Create a sequence of files on disk, each of which contains one year

In [None]:
f = cf.read('ncas_data/IPSL-CM5A-LR_r1i1p1_tas_n96_rcp45_mnth.nc')[0]
print f
for i in range(10):
    g = f.subspace[12*i:12*(i+1)]
    year = g.coord('T').year.array[0]
    new_file = 'air_temperature_'+str(year)+'.nc'
    cf.write(g, new_file)
    print '    ',new_file

#### In ipython `!` preceeeds a shell command

In [None]:
!ls -o air_temperature_*.nc

In [None]:
f2 = cf.read('air_temperature_*.nc')
print f2

In [None]:
f.equals(f2[0])

In [None]:
f3 = cf.read('air_temperature_*.nc', aggregate=False)
f3

# 5. PP and UM files

In [None]:
x = cf.read('ncas_data/aaaaoa.pmh8dec.pp')
x

In [None]:
print x[1]

In [None]:
cf.write(x, 'aaaaoa.pmh8dec.nc')