In [2]:
import rasterio as rio
import numpy as np
from PIL import Image

In [3]:
def load_data(path_dict):
    data_dict = dict()
    for key, value in path_dict.items():
        with rio.open(value) as src:
            data = src.read()
            data = np.squeeze(data).astype("float32")
            data_dict[key] = (data, src)
    return data_dict

In [4]:
def get_lat_long(data, src):
    # index array
    lat_index = np.arange(0, data.shape[0])
    long_index = np.arange(0, data.shape[1])
    
    # meshgrid
    long_grid, lat_grid = np.meshgrid(long_index, lat_index)
    
    # flattened grids
    lat_grid_flat = lat_grid.flatten()
    long_grid_flat = long_grid.flatten()
    
    # getting long and lat
    A = src.transform
    long, lat = rio.transform.xy(A, lat_grid_flat, long_grid_flat)
    
    # reshaping to shape of original data
    lat = np.array(lat).reshape(data.shape)
    long = np.array(long).reshape(data.shape)
    
    return lat, long

def segment_coords(data, lat, long, target_coords = (0, 0), border = 0, length = 10):
    # latitude/longitude variables
    lat_max = target_coords[0]
    lat_min = lat_max - length
    long_min = target_coords[1]
    long_max = long_min + length
    
    # length variables
    xlen = data.shape[1]
    ylen = data.shape[0]
    
    # find xmin and xmax
    xmin, xmax = None, None
    for x in range(xlen):
        if long[0, x] > long_min:
            xmin = x if xmin == None else xmin
        if long[0, xlen - x - 1] < long_max:
            xmax = xlen - x - 1 if xmax == None else xmax
            
    # find ymin and ymax
    ymin, ymax = None, None
    for y in range(ylen):
        if lat[ylen - y - 1, 0] > lat_min:
            ymin = ylen - y - 1 if ymin == None else ymin
        if lat[y, 0] < lat_max:
            ymax = y if ymax == None else ymax
            
    # index data from top-bottom, left-right
    return data[ymax:ymin + border, xmin:xmax + border]

In [30]:
maize_data_path = "../data/raw/yield_maize.tif"
soy_data_path = "../data/raw/yield_soybean.tif"


path_dict = { "maize": maize_data_path, 
            "soy": soy_data_path}


#path_dict = {}
years = np.arange(2001, 2019)
for yr in years:
    chirps_data_path = "../data/raw/chirps/chirps-v2.0.{yr}.tif".format(yr = yr)
    path_dict["chirps_{yr}".format(yr = yr)] = chirps_data_path
    
data_dict = load_data(path_dict)

In [31]:
# Segmenting Maize Data 10S60W
target_coords = (-10, -60)
for yr in years:
    lat, long = get_lat_long(data_dict["chirps_{yr}".format(yr = yr)][0], data_dict["chirps_{yr}".format(yr = yr)][1])
    chirps_seg = segment_coords(data_dict["chirps_{yr}".format(yr = yr)][0], lat, long, target_coords, border = 1, length = 10)
    chirps_seg = Image.fromarray(chirps_seg)
    chirps_seg.save("../data/raw/segmented/10S60W/chirps_{yr}_{lat}{long}.tif".format(yr = yr, lat = "10S", long = "60W"))

#lat, long = get_lat_long(data_dict["soy"][0], data_dict["soy"][1])
#soy_seg = segment_coords(data_dict["soy"][0], lat, long, target_coords, border = 1, length = 10)

saving yr  2001
got lat long  2001
segmented  2001
finally saving  2001
saving yr  2002
got lat long  2002
segmented  2002
finally saving  2002
saving yr  2003
got lat long  2003
segmented  2003
finally saving  2003
saving yr  2004
got lat long  2004
segmented  2004
finally saving  2004
saving yr  2005
got lat long  2005
segmented  2005
finally saving  2005
saving yr  2006
got lat long  2006
segmented  2006
finally saving  2006
saving yr  2007
got lat long  2007
segmented  2007
finally saving  2007
saving yr  2008
got lat long  2008
segmented  2008
finally saving  2008
saving yr  2009
got lat long  2009
segmented  2009
finally saving  2009
saving yr  2010
got lat long  2010
segmented  2010
finally saving  2010
saving yr  2011
got lat long  2011
segmented  2011
finally saving  2011
saving yr  2012
got lat long  2012
segmented  2012
finally saving  2012
saving yr  2013
got lat long  2013
segmented  2013
finally saving  2013
saving yr  2014
got lat long  2014
segmented  2014
finally savin