# Clip_geotiffs_to_bounding_box

Uses geopandas to clip image geotiffs to a specified bounding box

This notebook requires a few special packages

## Load in all the modules needed

In [None]:
%matplotlib inline
# check if a windows machine, it needs special attention
# this extra step will bypass an error from mpl_toolkits.basemap
import os
if os.name == 'nt':
    os.environ["PROJ_LIB"] = os.path.join(os.environ["CONDA_PREFIX"], "Library", "share")
    os.environ["GDAL_DATA"] = os.path.join(os.environ["CONDA_PREFIX"], "Library", "share", "gdal")
import matplotlib.pyplot as plt
import glob
from pathlib import Path

import rasterio
from rasterio.plot import show
from rasterio.mask import mask
from shapely.geometry import box
import geopandas as geopd
from fiona.crs import from_epsg
import pycrs
import re

In [None]:
# Define the local machine location of CETB data cubes
# This directory is expected to contain subdirectories in the following hierarchy
# that duplicates the hierarchy on the Google Shared Drive NSIDC-SD-CETB/v1/, 
# for example:
# dataDir/F13_SSMI/N/nc_cubes/cubes_<regionName>
#
# outDir is the location where pkl files with the MOD data frames and geotiffs
# will be stored. We are keeping it separate from the large cubefile data store
# so that individual people can keep track of their own output files separately
user = 'MJMac' #Mariah #MJWindows
if ('Joan' == user):
    #dataDir = '/mnt/data3/cetb/nsidc0630_v1/' #jmr machine fringe 
    dataDir = Path(Path.home(), 'ceph', 'jmr204group','CETB_cubes')
    scriptDir = Path(Path.home(), 'ipynb_melt_onset', 'scripts')
#    outDir = Path(Path.home(), 'cetb/ipynb_melt__onset_plots')  #may need a spot for derived data
elif ('Mariah' == user):
    dataDir = Path('R:\\jmr204group\CETB_cubes')  # Mariah's PC
    scriptDir = Path(Path.home(), 'ipynb_melt_onset', 'scripts')
    outDir = Path(Path.home(), 'ipynb_melt_onset_plots') # may want to update this to a geotiff output directory
elif ('MJWindows' == user):
    dataDir = Path('Z:/mj On My Mac/nsidc0630_v1') # Mary Jo's Windows machine
    scriptDir = Path(Path.home(), 'ipynb_melt_onset', 'scripts')
    outDir = Path(Path.home(), 'ipynb_melt_onset_plots') # may want to update this to a geotiff output directory
elif ('MJMac' == user):
    dataDir = Path(Path.home(), 'nsidc0630_v1') # Mary Jo's Mac
    scriptDir = Path(Path.home(), 'ipynb_melt_onset', 'scripts')  
    outDir = Path(Path.home(), 'nsidc0630_v1')
else:
    raise ValueError("unknown user= %s\n" % (user) )
    
%cd $scriptDir
outDir, dataDir, user

## Specify inputs

In [None]:
# SPECIFY latitude and longitude in decimal degrees, need to choose lat/lon corners so that we will load
# in a rectangle of pixels within the corners of these coordinates
areaname='fairbanks' #'gsl' #'hunza' #'vatna' etc

if ('vatna' == areaname):
    lat_start=63.75  
    lat_end=64.88    
    lon_start=-20 
    lon_end=-15  
    #Enter a site name for titles of plots
    Site='Vatnajokull, Iceland'
elif 'hunza' == areaname:
    lat_start=35.9  
    lat_end=37.1   
    lon_start=74 
    lon_end=76 
    #Enter a site name for titles of plots
    Site='Hunza Basin'
elif 'gsl' == areaname:
    lat_start=59.00  
    lat_end=67.00   
    lon_start=-119.00 
    lon_end=-107.00
    #Enter a site name for titles of plots
    Site='Great Slave Lake, Canada'
elif 'bathurst_range' == areaname:
    lat_start=60.00  
    lat_end=67.25   
    lon_start=-119.00 
    lon_end=-107.50
    #Enter a site name for titles of plots
    Site='Bathurst Caribou Range, NWT'
elif 'bathurst_range2' == areaname:
    lat_start=63.00  
    lat_end=65.500   
    lon_start=-117.500 
    lon_end=-112.00
    #Enter a site name for titles of plots
    Site='Bathurst Caribou Range subset, NWT'
elif ('barrow' == areaname):
    lat_start=69.50  
    lat_end=71.50    
    lon_start=-158 
    lon_end=-152  
    #Enter a site name for titles of plots
    Site='Barrow/Utkiagvik, AK'  
elif ('fairbanks' == areaname):
    lat_start=63.0  
    lat_end=66.7    
    lon_start=-151.8
    lon_end=-143.4  
    #Enter a site name for titles of plots
    Site='Fairbanks, AK'
else: 
    raise ValueError("Unknown area name=%s" % (areaname)) 

# Save the MOD by year data frames for SIR and GRD to pickle files (with lat/lon geolocation)

Also saving geolocation and melt onset flag data frames

In [None]:
modsDir = "%s/MODs" % outDir
list = sorted(glob.glob("%s/EASE2_N3.125km*-%s.*.MOD.*tif" % (modsDir, areaname)))
list


## Clip the tif area to the lat/lon bounding box

The default behavior for the .tif image is the EASE2-projected box that encloses
the original bounding box.  This procedure will clip the .tif image to just the
lat/lon box (even it's rotated in EASE2).

The following uses this web page as an example:

https://automating-gis-processes.github.io/CSC18/lessons/L6/clipping-raster.html

In [None]:
def getFeatures(gdf):
    """Function to parse features from GeoDataFrame to the format that rasterio wants"""
    import json
    return[json.loads(gdf.to_json())['features'][0]['geometry']]

In [None]:
def clip_tif_to_bbox(outFileName, fileName, lat_start, lat_end, lon_start, lon_end):
    # Open the input geotiff
    data = rasterio.open(fileName)
    
    # Make a shapely bounding box using the input lat/lon bounds
    bbox = box(lon_start, lat_start, lon_end, lat_end)
    
    # Insert the bounding box into a GeoDataFrame (4326 is lat/lon)
    # and reproject into the projection from the input geotiff (e.g. EASE2_N)
    geo = geopd.GeoDataFrame({'geometry': bbox}, index=[0], crs=from_epsg(4326))
    geo = geo.to_crs(crs=data.crs.data)
    
    # Get the geometry coordinates in format that rasterio wants 
    coords = getFeatures(geo)
    
    # Clip to the bbox
    out_img, out_transform = mask(data, shapes=coords, crop=True)
    
    # Set up metadata (not sure if height/width will always be different, but
    # set them to the clipped size just to be sure)
    out_meta = data.meta.copy()
    out_meta.update({"height": out_img.shape[1],
                     "width": out_img.shape[2]})
    
    with rasterio.open(outFileName, "w", **out_meta) as dest:
        dest.write(out_img)
        
    print("Wrote clipped image to %s\n" % outFileName)
        

In [None]:
clipStr = "%s-clip" % areaname
for f in list:
    print("Next file = %s" % f)
    outFile = re.sub(areaname, clipStr, f)
    clip_tif_to_bbox(outFile, f, lat_start, lat_end, lon_start, lon_end)

In [None]:
data = rasterio.open(list[0])
data.meta

In [None]:
show((data, 1))

In [None]:
clipped = rasterio.open(outFileName)
clipped

In [None]:
show((clipped, 1))