In [1]:
import os
import pandas as pd
import geopandas as gpd
import xarray as xr
import numpy as np
import itertools
import datetime
from shapely.geometry import GeometryCollection, Point, LineString
from lotek.conversion import shp2mask
from rasterio.enums import MergeAlg

In [2]:
"""
Setup a cluster for parallel processing
"""
from distributed import Client, LocalCluster
cluster = LocalCluster(n_workers=5, threads_per_worker=2)
client = Client(cluster)

In [3]:
"""
Define I/O
"""
# Inputs
# path to main directory
inDIR = 'C:/SPK_local/for_others/Augustine_lotek'
# file name of cleaned gps data (from gps_cleaning.ipynb)
gps_f = 'TRMappended_GoodDays_2017TrackQ3_cleaned.csv'
# path to 1 m vegetation raster as grid template
veg_f = 'G:/neon_v18/neon_class_2017_v18.tif'
# path to 30 m raster as grid template
tif_30m_f = 'C:/SPK_local/data/rasters/Masks/CPER_dist_to_fence_2017.tif'
# path to shapefile with pasture boundaries
cper_f = "C:/SPK_local/data/vectors/Pasture_Boundaries/Shapefiles/cper_pastures_2017_clip.shp"

# Outputs
# path to output directory
outDIR = 'C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/data'
# file name of gridded output
out_gridded_f = '2017_grazing_time_gridded_all.csv'

In [4]:
"""
Read in data and pre-format
"""
# read in GPS data
df_gps = pd.read_csv(os.path.join(inDIR, gps_f), engine='python', parse_dates=['Fix_Date', 'Fix_DateTime'])

# read in CPER pasture shapefile
df_cper = gpd.read_file(cper_f)

# dissolve by pasture to make sure each pasture is a single polygon
df_cper = df_cper.dissolve(by='Past_Name_').reset_index().rename(columns={'Past_Name_': 'Pasture'})

# convert pasture polygons to an xarray (1 m gridded raster) object with pasture name as value
cper_info = df_cper[['Pasture', 'geometry']].reset_index(drop=True).reset_index().rename(columns={'index': 'id'})
past_dict = {row.id+1: row.Pasture for _, row in cper_info.iterrows()}
past_dict[0] = 'UNK'
past_mask_shp = [(row.geometry, row.id+1) for _, row in cper_info.iterrows()]

In [5]:
"""
Create daily and weekly groupings of GPS data
"""
# create unique ID by Pasture-Steer combo
df_gps['Pasture_Steer_ID'] = df_gps['Pasture'] + '_' + df_gps['Steer_ID']

# create daily grouping and count number of unique Pastures, Steers and Pasture-Steer combos on each day
df_gps_daily = df_gps.groupby(
    'Fix_Date').agg({'Pasture': lambda x: x.unique(),
                     'Steer_ID': lambda x: x.unique(),
                     'Pasture_Steer_ID': lambda x: x.unique()}).reset_index()

# create weekly grouping listing all pastures, Steers and Pasture-Steer combos present all days in the week
df_gps_wkly = df_gps_daily.groupby(
    df_gps_daily['Fix_Date'].dt.isocalendar().week).agg(
    {'Pasture': lambda x: [i for i in list(df_gps['Pasture'].unique()) if all(i in y for y in x)],
     'Steer_ID': lambda x: [i for i in list(df_gps['Steer_ID'].unique()) if all(i in y for y in x)],
     'Pasture_Steer_ID': lambda x: [i for i in list(df_gps['Pasture_Steer_ID'].unique()) if all(i in y for y in x)]}).reset_index()

In [6]:
"""
Print out all combos of Pastures available for each week across a set range of weeks
"""
reqd_pastures = []
min_n_pastures = 5
for r in range(min_n_pastures, 10):
    past_combos = list(itertools.combinations(df_gps['Pasture'].unique(), r))
    past_combos_sum = np.array(
        [sum(all(elem in test2 for elem in test1) for test2 in df_gps_wkly['Pasture']) for test1 in past_combos])
    top_combos = np.array(past_combos)[(past_combos_sum == np.max(past_combos_sum)) | (past_combos_sum > 5)]
    if len(reqd_pastures) > 0:
        reqd_mask = [all(p in x for p in reqd_pastures) for x in top_combos]
        top_combos = top_combos[reqd_mask]
    top_combos = [list(x) for x in top_combos]
    top_dates = []
    for combo in top_combos:
        mask = [all(j in i for j in combo) for i in df_gps_wkly['Pasture']]
        top_dates.append(
            [datetime.date.fromisocalendar(2017, x, 1).strftime('%b %d') for x in df_gps_wkly['week'][mask].values])
    print('\n\n' + '-------------------------------------' + '\n' + 
          'No. Pastures: ' + str(r) + '\n'
          'Max no. weeks: ' + str(np.max(past_combos_sum)) + '\n' +
          '-------------------------------------')
    for dates, combo in [(y, x) for y, x in sorted(zip(top_dates, top_combos), key=lambda pair: len(pair[0]), reverse=True)]:
        print('Pastures=' + str(len(combo)) + ': ' + str(combo))
        past_mask = [all(j in i for j in combo) for i in df_gps_wkly['Pasture']]
        #steer_mask = [all([j in y] for y in i for j in combo) for i in df_gps_wkly['Pasture_Steer_ID']]
        steer_ct = [sum([sum([j in y for j in combo]) for y in x]) for x in df_gps_wkly['Pasture_Steer_ID'][past_mask]]
        #steers = [len(x) for x in df_gps_wkly['Pasture_Steer_ID'][steer_mask]]
        #dates = [datetime.date.fromisocalendar(2017, x, 1).strftime('%B %d') for x in df_gps_wkly['week'][mask].values]
        print('Weeks=' + str(len(dates)) + ' (start dates): ' + str(dates))
        print('Steers: ' + str(steer_ct) + '\n')




-------------------------------------
No. Pastures: 5
Max no. weeks: 11
-------------------------------------
Pastures=5: ['7NW', '20SE', '15E', '26E', '31E']
Weeks=11 (start dates): ['May 22', 'May 29', 'Jun 05', 'Jul 10', 'Jul 17', 'Jul 24', 'Jul 31', 'Aug 14', 'Aug 21', 'Sep 11', 'Sep 18']
Steers: [8, 8, 8, 6, 6, 6, 6, 6, 6, 8, 7]

Pastures=5: ['17N', '20SE', '15E', '26E', '31E']
Weeks=10 (start dates): ['May 22', 'May 29', 'Jun 05', 'Jul 10', 'Jul 24', 'Jul 31', 'Aug 07', 'Sep 04', 'Sep 11', 'Sep 18']
Steers: [8, 8, 8, 7, 6, 6, 6, 8, 8, 8]

Pastures=5: ['20SE', '15E', '26E', '25SE', '31E']
Weeks=10 (start dates): ['May 22', 'May 29', 'Jun 05', 'Jul 10', 'Jul 17', 'Aug 07', 'Aug 21', 'Sep 04', 'Sep 11', 'Sep 18']
Steers: [8, 8, 8, 7, 6, 6, 6, 7, 7, 7]

Pastures=5: ['7NW', '17N', '20SE', '15E', '31E']
Weeks=9 (start dates): ['May 22', 'May 29', 'Jun 05', 'Jul 10', 'Jul 24', 'Jul 31', 'Sep 11', 'Sep 18', 'Sep 25']
Steers: [9, 9, 9, 7, 6, 6, 9, 8, 9]

Pastures=5: ['7NW', '17N', '15E'

In [7]:
"""
Set the final combo of training pastures for modelling and create mask for extracting weeks when all are available
"""
# specific final pastures
fnl_pastures = ['7NW', '17N', '20SE', '15E', '26E', '31E']

# create mask of final weeks based on pastures
wk_mask = [all(j in i for j in fnl_pastures) for i in df_gps_wkly['Pasture']]

# check which weeks are in final dataset
df_gps_wkly['week'][wk_mask].values

array([21, 22, 23, 28, 30, 31, 37, 38], dtype=int64)

In [8]:

# read in CPER pasture shapefile
df_cper = gpd.read_file(cper_f)

# dissolve by pasture to make sure each pasture is a single polygon
df_cper = df_cper.dissolve(by='Past_Name_').reset_index().rename(columns={'Past_Name_': 'Pasture'})

# convert pasture polygons to an xarray (1 m gridded raster) object with pasture name as value
cper_info = df_cper[['Pasture', 'geometry']].reset_index(drop=True).reset_index().rename(columns={'index': 'id'})
past_dict = {row.id+1: row.Pasture for _, row in cper_info.iterrows()}
past_dict[0] = 'UNK'
past_mask_shp = [(row.geometry, row.id+1) for _, row in cper_info.iterrows()]

In [9]:
"""
Create 1 m and 30 m raster template for subsequent analysis
"""
# read in 1 m vegetation community raster as template and create pasture mask at 1 m
xr_veg = xr.open_rasterio(veg_f).squeeze('band')
past_mask_1m = shp2mask(shp=past_mask_shp, xr_object=xr_veg)
past_mask_1m.values = np.array([past_dict[i] for i in past_mask_1m.values.flatten()]).reshape(past_mask_1m.shape)

# read in 30 m template and create pasture mask at 30 m
xr_30m = xr.open_rasterio(tif_30m_f).squeeze().reset_coords(drop=True)
past_mask_30m = shp2mask(shp=past_mask_shp, xr_object=xr_30m)
past_mask_30m.values = np.array([past_dict[i] for i in past_mask_30m.values.flatten()]).reshape(past_mask_30m.shape)

In [10]:
"""
Loop through final pastures and weeks to extract final dataset 
of grazing time per cell for a given pasture, steer and time period
"""

# create function for splitting line into segments
def segments(curve):
    return list(map(LineString, zip(curve.coords[:-1], curve.coords[1:])))


# create new column in gps dataset for week-of-year
df_gps['week'] = df_gps['Fix_Date'].dt.isocalendar().week

# create DataFrame template for saving data
df_wkly_grid = pd.DataFrame(columns=['mod_data', 'week', 'Pasture', 'Steer_ID', 'UTM_X', 'UTM_Y', 'grazing_secs'])

# loop through week and pasture and steer to create each dataset
for wk in df_gps_wkly['week'].unique():

#for wk in df_gps_wkly['week'][wk_mask].values:
    print('-------------' +  
          'Week starting: ' + 
          datetime.date.fromisocalendar(2017, wk, 1).strftime('%b %d') + ' (' + str(wk) +
         ') -------------')
    for past in df_gps['Pasture'][df_gps['week'] == wk].unique():
        # check if pasture+week is in training dataset
        if past in fnl_pastures and wk in df_gps_wkly['week'][wk_mask].values:
            mod_data = 'train'
        else:
            mod_data = 'test'
        steer_mask = [past in i for i in df_gps_wkly['Pasture_Steer_ID'][df_gps_wkly['week'] == wk].iloc[0]]
        steer_list = np.array(df_gps_wkly['Steer_ID'][df_gps_wkly['week'] == wk].iloc[0])[steer_mask]
        if len(steer_list) > 0:
            print('Pasture: ' + past + ' (' + mod_data + ')')
        for steer in steer_list:
            print('  ' + steer)
            
            #zip the coordinates into a point object and convert to a GeoData Frame
            geometry = [Point(xy) for xy in zip(df_gps.UTM_X_fnl[(df_gps.week == wk) &
                                                             (df_gps.Pasture == past) &
                                                             (df_gps.Steer_ID == steer) &
                                                             (df_gps.bout_act == 'Grazing')],
                                                df_gps.UTM_Y_fnl[(df_gps.week == wk) &
                                                             (df_gps.Pasture == past) & 
                                                             (df_gps.Steer_ID == steer) &
                                                             (df_gps.bout_act == 'Grazing')])]
            if len(geometry) == 0:
                print("    SKIPPING STEER: All points masked.")
                continue
            else:
                gdf_pts = gpd.GeoDataFrame(df_gps[(df_gps.week == wk) & 
                                                  (df_gps.Pasture == past) &
                                                  (df_gps.Steer_ID == steer) &
                                                  (df_gps.bout_act == 'Grazing')],
                                           geometry=geometry)

                # create lines for each grazing bout, grouped by each steer and date
                gdf_bouts = gdf_pts.groupby(['Fix_Date', 
                                             'grazing_bout'])['geometry'].apply(lambda x: LineString(x.tolist()))
                gdf_bouts = gpd.GeoDataFrame(gdf_bouts, geometry='geometry').reset_index(drop=False)

                # calculate movement rate of each step
                secs_m = gdf_pts.groupby(['Fix_Date',
                                 'grazing_bout'])['moverate'].apply(lambda x: 60/x.shift(-1))

                # drop null values created after last step
                secs_m = secs_m[~np.isnan(secs_m)]

                # create geodata frame of steps
                gdf_steps = gdf_bouts
                gdf_steps['geometry'] = gdf_bouts.apply(lambda x: GeometryCollection(segments(x.geometry)), axis=1)
                gdf_steps = gpd.GeoDataFrame(gdf_steps, geometry='geometry').reset_index(drop=False)
                gdf_steps = gdf_steps.explode()

                # add movement rate to geodata frame
                gdf_steps['secs_m'] = secs_m.values

                xr_temp_1m = past_mask_1m.where(past_mask_1m == past, drop=True)
                xr_temp_1m = xr_temp_1m.assign_attrs({'transform': (1.0, 0.0, xr_temp_1m.x.min().values - 0.5, 
                                                           0.0, -1.0, xr_temp_1m.y.max().values + 0.5)})
                steps_info = gdf_steps[['secs_m', 'geometry']].reset_index(drop=True).reset_index().rename(
                    columns={'index': 'id'})
                steps_shp = [(row.geometry, row.secs_m) for _, row in steps_info.iterrows()]
                xr_steps_1m = shp2mask(shp=steps_shp, xr_object=xr_temp_1m, dtype='float32',
                                    merge_alg=MergeAlg('ADD'), all_touched=True)#.stack(z=['x', 'y'])
                xr_temp_30m = past_mask_30m.where(past_mask_30m == past, drop=True)
                xr_temp_30m = xr_temp_30m.assign_attrs({'transform': (30.0, 0.0, xr_temp_30m.x.min().values - 15.0, 
                                                           0.0, -30.0, xr_temp_30m.y.max().values + 15.0)})
                xr_steps_30m = xr_steps_1m.coarsen(dim=dict(x=30, y=30), boundary='trim').sum().reindex_like(
                    xr_temp_30m, 
                    method='nearest').stack(z=['x', 'y'])
                df_tmp = pd.DataFrame({
                    'mod_data': mod_data,
                    'week': wk,
                    'Pasture': past,
                    'Steer_ID': steer,
                    'UTM_X': [i[0] for i in xr_steps_30m['z'].values[xr_temp_30m.stack(z=['x', 'y']).values == past]],
                    'UTM_Y': [i[1] for i in xr_steps_30m['z'].values[xr_temp_30m.stack(z=['x', 'y']).values == past]],
                    'grazing_secs': xr_steps_30m.values[xr_temp_30m.stack(z=['x', 'y']).values == past]
                })
                df_wkly_grid = df_wkly_grid.append(df_tmp)

-------------Week starting: May 08 (19) -------------
Pasture: 5E (test)
  3772_D1_2017
    SKIPPING STEER: All points masked.
  3773_D1_2017
    SKIPPING STEER: All points masked.
Pasture: 24W (test)
  3770_D1_2017
Pasture: 15E (test)
  3772_D1_2017
  3773_D1_2017
Pasture: 26E (test)
  3775_D1_2017
Pasture: 25SE (test)
  3776_D1_2017
  3778_D1_2017
-------------Week starting: May 15 (20) -------------
Pasture: 5E (test)
  2285_D1_2017
  2286_D1_2017
  3772_D1_2017
    SKIPPING STEER: All points masked.
  3773_D1_2017
    SKIPPING STEER: All points masked.
Pasture: 17N (test)
  2289_D1_2017
Pasture: 19N (test)
  2291_D1_2017
Pasture: 20SE (test)
  2293_D1_2017
  2294_D1_2017
Pasture: 24W (test)
  3770_D1_2017
Pasture: 15E (test)
  3772_D1_2017
  3773_D1_2017
Pasture: 25SE (test)
  3776_D1_2017
  3778_D1_2017
Pasture: 31E (test)
  4598_D1_2017
-------------Week starting: May 22 (21) -------------
Pasture: 5E (test)
  2285_D1_2017
  2286_D1_2017
  3772_D1_2017
    SKIPPING STEER: All poi

In [11]:
"""
Save dataset to csv
"""
df_wkly_grid.to_csv(os.path.join(outDIR, out_gridded_f), index_label='index_id')