# 20220323: Explore the use of Xarray -> Pandas interface
I want to generate gridded climate projection data to a table of variables for each lat, lon and time step in a NetCDF file.

In [7]:
import xarray as xr
import pandas as pd
import iris #just incase
import numpy as np
import cartopy.crs as ccrs

In [2]:
fname = "pr_rcp85_land-rcm_uk_12km_01_day_20201201-20301130.nc"
ds = xr.open_dataset(fname)
ds

## Transform into Pandas dataframe

In [4]:
%%time
df = ds.to_dataframe().reset_index()

KeyboardInterrupt: 

In [5]:
df.head()

NameError: name 'df' is not defined

In [None]:
df['grid_latitude']

In [None]:
df[['grid_latitude', 'grid_latitude']]

In [21]:
%%time
pr = df[['yyyymmdd', 'month_number', 'year', 'pr', 'grid_latitude', 'grid_longitude', 'ensemble_member']]

CPU times: user 3.05 s, sys: 9.45 s, total: 12.5 s
Wall time: 15.1 s


In [22]:
%%time
pr.drop_duplicates(ignore_index=True)

CPU times: user 51.1 s, sys: 38.2 s, total: 1min 29s
Wall time: 1min 49s


Unnamed: 0,yyyymmdd,month_number,year,pr,grid_latitude,grid_longitude,ensemble_member
0,b'20201201 ...,12,2020,0.690706,1.432699,-18.241456,1
1,b'20201201 ...,12,2020,0.853663,1.404462,-18.138041,1
2,b'20201201 ...,12,2020,1.088705,1.376215,-18.034611,1
3,b'20201201 ...,12,2020,1.452076,1.347959,-17.931165,1
4,b'20201201 ...,12,2020,1.969763,1.319692,-17.827705,1
...,...,...,...,...,...,...,...
33062395,b'20301130 ...,11,2030,0.989718,10.752585,-7.000053,1
33062396,b'20301130 ...,11,2030,0.240123,10.722549,-6.895056,1
33062397,b'20301130 ...,11,2030,0.035135,10.692480,-6.790090,1
33062398,b'20301130 ...,11,2030,0.000422,10.662379,-6.685155,1


## Final Xarray -> Pandas pipeline

In [None]:
df = (ds.to_dataframe()
      .reset_index()
      [['yyyymmdd', 'month_number', 'year', 'pr', 'grid_latitude', 'grid_longitude', 'ensemble_member']]
      .drop_duplicates(ignore_index=True)
     )

## Generate lat-lon pairs in Xarray Dataset

In [3]:
import cartopy.crs as ccrs

### Add lat lon coords using Iris

In [8]:
cube = iris.load_cube(fname)
cube

Lwe Precipitation Rate (mm/day),ensemble_member,time,projection_y_coordinate,projection_x_coordinate
Shape,1,3600,112,82
Dimension coordinates,,,,
ensemble_member,x,-,-,-
time,-,x,-,-
projection_y_coordinate,-,-,x,-
projection_x_coordinate,-,-,-,x
Auxiliary coordinates,,,,
ensemble_member_id,x,-,-,-
month_number,-,x,-,-
year,-,x,-,-


In [9]:
def add_lat_lon(cube, inplace=True, geodetic=None):
    if not inplace:
        cube = cube.copy()
    if geodetic==None:
        geodetic = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS)
    
    cube_crs = cube.coord_system().as_cartopy_crs()
    
    x = cube.coord(dim_coords=True, axis='X')
    y = cube.coord(dim_coords=True, axis='Y')

    mx, my = np.meshgrid(x.points, y.points)

    latlons = geodetic.as_cartopy_crs().transform_points(cube_crs, mx, my)

    lons = latlons[:,:,0]
    lats = latlons[:,:,1]

    lat_coord = iris.coords.AuxCoord(lats, 
                                     standard_name='latitude',
                                     long_name='latitude', 
                                     units='degrees',
                                     coord_system=geodetic)
    lon_coord = iris.coords.AuxCoord(lons, 
                                     standard_name='longitude',
                                     long_name='longitude', 
                                     units='degrees',
                                     coord_system=geodetic)

    cube.add_aux_coord(lat_coord, (y.cube_dims(cube)[0], x.cube_dims(cube)[0]))
    cube.add_aux_coord(lon_coord, (y.cube_dims(cube)[0], x.cube_dims(cube)[0]))
    
    return cube

In [10]:
%%time
add_lat_lon(cube)

  globe=globe,


Lwe Precipitation Rate (mm/day),ensemble_member,time,projection_y_coordinate,projection_x_coordinate
Shape,1,3600,112,82
Dimension coordinates,,,,
ensemble_member,x,-,-,-
time,-,x,-,-
projection_y_coordinate,-,-,x,-
projection_x_coordinate,-,-,-,x
Auxiliary coordinates,,,,
ensemble_member_id,x,-,-,-
month_number,-,x,-,-
year,-,x,-,-


### Trasfer Cube to DataArray

In [11]:
da = xr.DataArray.from_iris(cube)

In [14]:
cube.add_aux_coord?

[0;31mSignature:[0m [0mcube[0m[0;34m.[0m[0madd_aux_coord[0m[0;34m([0m[0mcoord[0m[0;34m,[0m [0mdata_dims[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Adds a CF auxiliary coordinate to the cube.

Args:

* coord
    The :class:`iris.coords.DimCoord` or :class:`iris.coords.AuxCoord`
    instance to add to the cube.

Kwargs:

* data_dims
    Integer or iterable of integers giving the data dimensions spanned
    by the coordinate.

Raises a ValueError if a coordinate with identical metadata already
exists on the cube.

See also :meth:`Cube.remove_coord()<iris.cube.Cube.remove_coord>`.
[0;31mFile:[0m      ~/opt/miniconda3/envs/data/lib/python3.7/site-packages/iris/cube.py
[0;31mType:[0m      method


In [12]:
%%time
df = (da.to_dataframe()
      .reset_index()
      [['yyyymmdd', 'month_number', 'year', 'latitude', 'longitude', 'ensemble_member', 'pr']]
      .drop_duplicates(ignore_index=True)
     )

CPU times: user 52.9 s, sys: 44.4 s, total: 1min 37s
Wall time: 2min 3s


In [13]:
df

Unnamed: 0,yyyymmdd,month_number,year,pr,latitude,longitude,ensemble_member
0,20201201,12,2020,0.690706,48.682767,-10.291939,1
1,20201201,12,2020,0.853663,48.694350,-10.130549,1
2,20201201,12,2020,1.088705,48.705708,-9.969056,1
3,20201201,12,2020,1.452076,48.716841,-9.807464,1
4,20201201,12,2020,1.969763,48.727750,-9.645773,1
...,...,...,...,...,...,...,...
33062395,20301130,11,2030,0.989718,60.828859,3.780264,1
33062396,20301130,11,2030,0.240123,60.819205,3.999770,1
33062397,20301130,11,2030,0.035135,60.809193,4.219121,1
33062398,20301130,11,2030,0.000422,60.798825,4.438310,1
