In [1]:
import os
import glob
import rasterio as rio
from rasterio.mask import raster_geometry_mask
from rasterio import plot
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import pyproj
from shapely.geometry import shape, MultiPolygon
from shapely.ops import transform

In [8]:
# Functions

# Get panel masks

def get_panel_masks(geojson_path, ref_img_path):
    '''This function generates a mask that leaves only pixels inside 
    vine panels unmasked. 
    
    Inputs:
    geojson_path: path to a geojson file with the panel geometries. 
    Must be in same CRS as image, otherwise un-comment code section starting with "dst_crs".
    ref_img_path: path to the .tif file for the image
    
    Outputs:
    A set of masks, as Boolean arrays. True values are masked pixels.
    False values are unmasked pixels.
    '''
    with open(geojson_path, 'r') as f:
        panels = json.load(f)
        features = panels['features']
        features_ordered = sorted(features, key= lambda feature:(feature['properties']['row'],feature['properties']['panel'])) # ground truth order.
        panel_aois = [f['geometry'] for f in features_ordered]
    
    with rio.open(ref_img_path) as src:
        dst_crs = src.crs
        
        # geojson features (the field block geometries)
        # are often in WGS84
        # project these to the image coordinates
        wgs84 = pyproj.CRS('EPSG:4326')

        project = pyproj.Transformer.from_crs(wgs84, dst_crs, always_xy=True).transform
        proj_panel_aois = [transform(project, shape(p)) for p in panel_aois]

        masks = [raster_geometry_mask(src, [p], crop=False)[0]
                 for p in proj_panel_aois]
       
    return masks
   

# Get reflectance df

def get_rfl(img_path, masks):
    '''
    Function to extract per-panel reflectance values for 4-band SkySat images.
    Calculates the mean reflectance values for the pixels in each panel.
    The output is formatted as a pandas dataframe.
    
    Inputs:
    img_path (str) - abs. path to the image       
    masks (list) - a list of masks that correspond to the panel aois. Each mask item is a Boolean array. 
    '''
    
    with rio.open(img_path,'r') as img:
        def _get_stats(mask, panel_number):
            panel_blue = np.ma.mean(np.ma.array(img.read(1), mask=mask))/10000
            panel_green = np.ma.mean(np.ma.array(img.read(2), mask=mask))/10000
            panel_red = np.ma.mean(np.ma.array(img.read(3), mask=mask))/10000
            panel_nir = np.ma.mean(np.ma.array(img.read(4), mask=mask))/10000
            
            return {'acquired': img_path.split('/')[-1].split('_')[-8], # date from image filename
                    'plot': panel_number + 1,    #add one - panels are numbered starting at 1 in the 'real world'
                    'blue': panel_blue,
                    'green': panel_green,
                    'red': panel_red,
                    'nir': panel_nir}
        data = [_get_stats(m, i) for i, m in enumerate(masks)]  # sets the parameters for the nested function as the mask (m) and corresponding panel number (i). 
        df = pd.DataFrame(data)
    
    return df

In [3]:
# image directory containing SkySat scenes co-registered by Bruno Aragon

img_dir = '../data/images/2020/SkySat/Coregistered_Scenes/'

# image pathnames
img_paths = glob.glob(img_dir+'*_cor.tif')

# geojson with panel geometries

#json_path = '/Users/kathleenkanaley/Desktop/grapes_in_space/map/qgis_layers/crit_panels_geojson.geojson'
json_path = '../data/crit_panels_geojson.geojson'

img_paths


['../data/images/2020/SkySat/Coregistered_Scenes/20200812_153924_ssc3d2_0013_analytic_SR_corg_cor.tif',
 '../data/images/2020/SkySat/Coregistered_Scenes/20200805_154044_ssc13d2_0013_analytic_SR_corg_cor.tif',
 '../data/images/2020/SkySat/Coregistered_Scenes/20200618_160058_ssc3d2_0013_analytic_SR_corg_cor.tif',
 '../data/images/2020/SkySat/Coregistered_Scenes/20200710_155028_ssc3d2_0012_analytic_SR_corg_cor.tif',
 '../data/images/2020/SkySat/Coregistered_Scenes/20200625_154704_ssc12d2_0013_analytic_SR_corg_cor.tif']

In [7]:
img_paths[0].split('/')[-1].split('_')[-8]

'20200812'

In [4]:
json_path

'../data/crit_panels_geojson.geojson'

In [9]:
# Get dataframes for all images in directory

master_df = pd.DataFrame()

for i in img_paths:
    masks = get_panel_masks(json_path, i)
    df = get_rfl(i, masks)
    master_df = pd.concat([master_df, df], axis=0)
    master_df = master_df.sort_values(by=['acquired', 'plot'])

master_df

Unnamed: 0,acquired,plot,blue,green,red,nir
0,20200618,1,0.040044,0.077491,0.071216,0.353575
1,20200618,2,0.044693,0.081807,0.077133,0.336587
2,20200618,3,0.042773,0.079753,0.074740,0.331823
3,20200618,4,0.040121,0.076396,0.070693,0.343482
4,20200618,5,0.034862,0.073497,0.060772,0.358279
...,...,...,...,...,...,...
315,20200812,316,0.031534,0.060766,0.036586,0.376969
316,20200812,317,0.032153,0.061000,0.036253,0.390390
317,20200812,318,0.031903,0.060238,0.033503,0.387841
318,20200812,319,0.031094,0.060109,0.034412,0.406644


In [10]:
# Append corresponding row and panel numbers for each plot 

panel_csv = pd.read_csv('../data/crit_panels.csv')
cols = panel_csv.iloc[:,1:] # adjust this according to the .csv
#cols
appended = pd.concat([master_df, cols],axis=1)
appended

Unnamed: 0,acquired,plot,blue,green,red,nir,row,panel
0,20200618,1,0.040044,0.077491,0.071216,0.353575,1,1
1,20200618,2,0.044693,0.081807,0.077133,0.336587,1,2
2,20200618,3,0.042773,0.079753,0.074740,0.331823,1,3
3,20200618,4,0.040121,0.076396,0.070693,0.343482,1,4
4,20200618,5,0.034862,0.073497,0.060772,0.358279,1,5
...,...,...,...,...,...,...,...,...
315,20200812,316,0.031534,0.060766,0.036586,0.376969,20,12
316,20200812,317,0.032153,0.061000,0.036253,0.390390,20,13
317,20200812,318,0.031903,0.060238,0.033503,0.387841,20,14
318,20200812,319,0.031094,0.060109,0.034412,0.406644,20,15


In [11]:
appended.acquired.unique()


array(['20200618', '20200625', '20200710', '20200805', '20200812'],
      dtype=object)

In [12]:
scout_csv = pd.read_csv('../data/scout/scout_2020_gdf.csv')
print(scout_csv.Date.unique())
print(scout_csv.columns)

['2020-06-18' '2020-06-25' '2020-07-01' '2020-07-09' '2020-07-15'
 '2020-07-21' '2020-07-30' '2020-08-06' '2020-08-13' '2020-08-20'
 '2020-09-09']
Index(['Date', 'Row', 'Panel', 'Treatment', 'Block', 'PM_severity',
       'DM_severity', 'total_dis', 'geometry', 'centroid'],
      dtype='object')


In [13]:
# Match scout data to image data

# Define filtering and matching parameters

first_row = 1
last_row = 10

match_dates = ['2020-06-18', '2020-06-25','2020-07-09', '2020-08-06', '2020-08-13']
match_cols =['row', 'panel', 'Date']


# Filter and join datasets

img_filtered = appended[appended['row']>=first_row]
img_filtered = img_filtered[img_filtered['row']<=last_row]
scout_filtered = scout_csv[scout_csv['Date'].isin(match_dates)]
scout_filtered = scout_filtered.rename(columns={'Row':'row', 'Panel':'panel'})

# Create a 'Date' column in the image dataframe to match the scout date
def match_days(row):
    if row['acquired']=='20200618':
        return '2020-06-18'
    if row['acquired']=='20200625':
        return '2020-06-25'
    if row['acquired']=='20200710':
        return '2020-07-09'
    if row['acquired']=='20200805':
        return '2020-08-06'
    if row['acquired']=='20200812':
        return '2020-08-13'

img_filtered['Date'] = img_filtered.apply(lambda row: match_days(row), axis=1)

# Join and drop NANs + unneeded columns
joined = img_filtered.join(scout_filtered.set_index(match_cols), on=match_cols)
cleaned = joined.dropna(axis=0,subset=['Treatment', 'Block', 'DM_severity', 'PM_severity', 'total_dis', 'geometry', 'centroid'])
#cleaned = joined.dropna(axis=0, subset=['geometry','Block', 'total_dis','centroid'])

In [15]:
cleaned.sort_values(by='Date')
cleaned

Unnamed: 0,acquired,plot,blue,green,red,nir,row,panel,Date,Treatment,Block,PM_severity,DM_severity,total_dis,geometry,centroid
0,20200618,1,0.040044,0.077491,0.071216,0.353575,1,1,2020-06-18,5.0,PM,0.0,0.00,0.00,POLYGON ((-77.0152645742787 42.878300949250196...,POINT (-77.0153084903025 42.8783140936145)
1,20200618,2,0.044693,0.081807,0.077133,0.336587,1,2,2020-06-18,9.0,PM,0.0,0.05,5.05,POLYGON ((-77.01535545427274 42.87831851925121...,POINT (-77.0153965503027 42.87833067861654)
3,20200618,4,0.040121,0.076396,0.070693,0.343482,1,4,2020-06-18,18.0,PM,0.0,0.00,0.50,POLYGON ((-77.01552730426143 42.87835065925314...,POINT (-77.01557007030289 42.87836309362055)
4,20200618,5,0.034862,0.073497,0.060772,0.358279,1,5,2020-06-18,6.0,PM,0.0,0.00,15.10,POLYGON ((-77.01561588425558 42.87836680925413...,POINT (-77.01565649030296 42.878378758622546)
5,20200618,6,0.039300,0.075070,0.068893,0.337817,1,6,2020-06-18,2.0,PM,0.0,0.00,5.65,"POLYGON ((-77.01570014425 42.878381989255075, ...",POINT (-77.01574402030295 42.87839504362453)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,20200812,154,0.033227,0.056373,0.036703,0.367113,10,10,2020-08-13,12.0,DM,0.0,0.60,0.60,POLYGON ((-77.01605999902254 42.87867018016665...,POINT (-77.0161013453033 42.878682743632794)
154,20200812,155,0.029468,0.054482,0.033250,0.364829,10,11,2020-08-13,3.0,DM,0.0,0.25,0.25,"POLYGON ((-77.0161457490169 42.8786865901676, ...",POINT (-77.01618532530341 42.878698738634725)
156,20200812,157,0.028348,0.053731,0.030669,0.392379,10,13,2020-08-13,4.0,DM,0.0,0.20,0.20,"POLYGON ((-77.0163147390058 42.87871901016951,...",POINT (-77.01635581030357 42.87873143863867)
157,20200812,158,0.031935,0.059768,0.037213,0.380077,10,14,2020-08-13,16.0,DM,0.0,3.45,3.45,"POLYGON ((-77.0163999390002 42.87873515017044,...",POINT (-77.01644346530357 42.878748438640656)


In [16]:
cleaned.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 499 entries, 0 to 158
Data columns (total 16 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   acquired     499 non-null    object 
 1   plot         499 non-null    int64  
 2   blue         499 non-null    float64
 3   green        499 non-null    float64
 4   red          499 non-null    float64
 5   nir          499 non-null    float64
 6   row          499 non-null    int64  
 7   panel        499 non-null    int64  
 8   Date         499 non-null    object 
 9   Treatment    499 non-null    float64
 10  Block        499 non-null    object 
 11  PM_severity  499 non-null    float64
 12  DM_severity  499 non-null    float64
 13  total_dis    499 non-null    float64
 14  geometry     499 non-null    object 
 15  centroid     499 non-null    object 
dtypes: float64(8), int64(3), object(5)
memory usage: 66.3+ KB


In [14]:
# If using a raster with masked pixels

no_nullpix = cleaned.dropna(axis=0, subset=['blue', 'green', 'red', 'nir'])
no_nullpix.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 299 entries, 1 to 158
Data columns (total 16 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   acquired     299 non-null    object 
 1   plot         299 non-null    int64  
 2   blue         299 non-null    float64
 3   green        299 non-null    float64
 4   red          299 non-null    float64
 5   nir          299 non-null    float64
 6   row          299 non-null    int64  
 7   panel        299 non-null    int64  
 8   Date         299 non-null    object 
 9   Treatment    299 non-null    float64
 10  Block        299 non-null    object 
 11  PM_severity  299 non-null    float64
 12  DM_severity  299 non-null    float64
 13  total_dis    299 non-null    float64
 14  geometry     299 non-null    object 
 15  centroid     299 non-null    object 
dtypes: float64(8), int64(3), object(5)
memory usage: 39.7+ KB


In [17]:
# Export dataframe as .csv

output_fp = '../data/img_scout_dfs/2020/'

#cleaned.to_csv(output_fp+'coreg_skysat_scout_2020.csv', index=False)

In [2]:
# Merge incidence data

inc = pd.read_csv('/Users/kathleenkanaley/Desktop/grapes_from_space/data/scout/scout_incidence/scout_inc_2020.csv')

other = pd.read_csv('/Users/kathleenkanaley/Desktop/grapes_from_space/data/img_scout_dfs/2020/coreg_skysat_scout_2020.csv')

In [4]:
print(inc.columns)
print(other.columns)

Index(['Date', 'Row', 'Panel', 'Treatment', 'Block', 'PM_severity',
       'DM_severity', 'total_dis', 'geometry', 'centroid', 'PM_inc', 'DM_inc'],
      dtype='object')
Index(['acquired', 'plot', 'blue', 'green', 'red', 'nir', 'row', 'panel',
       'Date', 'Treatment', 'Block', 'PM_severity', 'DM_severity', 'total_dis',
       'geometry', 'centroid'],
      dtype='object')


In [6]:
inc.rename(columns={"Row": "row", "Panel": "panel"}, inplace=True)
inc.columns

Index(['Date', 'row', 'panel', 'Treatment', 'Block', 'PM_severity',
       'DM_severity', 'total_dis', 'geometry', 'centroid', 'PM_inc', 'DM_inc'],
      dtype='object')

In [11]:
merged_2020 = other.merge(inc, how='left', on=['Date', 
                                               'row', 
                                               'panel',
                                               'Treatment', 
                                               'Block', 
                                               'PM_severity','DM_severity',
                                              'total_dis','geometry','centroid'])
merged_2020.shape


(499, 18)

In [12]:
other.shape

(499, 16)

In [13]:
merged_2020

Unnamed: 0,acquired,plot,blue,green,red,nir,row,panel,Date,Treatment,Block,PM_severity,DM_severity,total_dis,geometry,centroid,PM_inc,DM_inc
0,20200618,1,0.040044,0.077491,0.071216,0.353575,1,1,2020-06-18,5.0,PM,0.0,0.00,0.00,POLYGON ((-77.0152645742787 42.878300949250196...,POINT (-77.0153084903025 42.8783140936145),0.0,0.0
1,20200618,2,0.044693,0.081807,0.077133,0.336587,1,2,2020-06-18,9.0,PM,0.0,0.05,5.05,POLYGON ((-77.01535545427274 42.87831851925121...,POINT (-77.0153965503027 42.87833067861654),0.0,5.0
2,20200618,4,0.040121,0.076396,0.070693,0.343482,1,4,2020-06-18,18.0,PM,0.0,0.00,0.50,POLYGON ((-77.01552730426143 42.87835065925314...,POINT (-77.01557007030289 42.87836309362055),0.0,0.0
3,20200618,5,0.034862,0.073497,0.060772,0.358279,1,5,2020-06-18,6.0,PM,0.0,0.00,15.10,POLYGON ((-77.01561588425558 42.87836680925413...,POINT (-77.01565649030296 42.878378758622546),0.0,0.0
4,20200618,6,0.039300,0.075070,0.068893,0.337817,1,6,2020-06-18,2.0,PM,0.0,0.00,5.65,"POLYGON ((-77.01570014425 42.878381989255075, ...",POINT (-77.01574402030295 42.87839504362453),0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,20200812,154,0.033227,0.056373,0.036703,0.367113,10,10,2020-08-13,12.0,DM,0.0,0.60,0.60,POLYGON ((-77.01605999902254 42.87867018016665...,POINT (-77.0161013453033 42.878682743632794),0.0,10.0
495,20200812,155,0.029468,0.054482,0.033250,0.364829,10,11,2020-08-13,3.0,DM,0.0,0.25,0.25,"POLYGON ((-77.0161457490169 42.8786865901676, ...",POINT (-77.01618532530341 42.878698738634725),0.0,5.0
496,20200812,157,0.028348,0.053731,0.030669,0.392379,10,13,2020-08-13,4.0,DM,0.0,0.20,0.20,"POLYGON ((-77.0163147390058 42.87871901016951,...",POINT (-77.01635581030357 42.87873143863867),0.0,10.0
497,20200812,158,0.031935,0.059768,0.037213,0.380077,10,14,2020-08-13,16.0,DM,0.0,3.45,3.45,"POLYGON ((-77.0163999390002 42.87873515017044,...",POINT (-77.01644346530357 42.878748438640656),0.0,30.0


In [14]:
output_fp = '/Users/kathleenkanaley/Desktop/grapes_from_space/data/img_scout_dfs/'
#merged_2020.to_csv(output_fp+'2020/INC_coreg_skysat_scout_2020.csv', index=False)