In [69]:
import os
import glob

import pandas as pd
import numpy as np
import rasterio as rio

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns

import scipy
from scipy import stats
#import statannotations
from statannot import add_stat_annotation


In [70]:
def img_to_df(img_path, cols):
    with rio.open(img_path) as src:
        as_array = src.read()
        band_arrs = []
        
        for i in np.arange(src.count):
            band_arr = as_array[i].flatten()
            band_arr = band_arr/10000
            
            band_arrs.append(band_arr)
        
        df = pd.DataFrame(band_arrs,cols).T
        df['Date'] = img_path.split('/')[-1].split('_')[0]
        
        # Optional: drops entries with no severity rating. Comment out if unnecessary.
        no_nans = df.dropna(subset=['total_dis_sev', 'dm_sev', 'dm_inc']) 
        
        return no_nans

In [71]:
def add_vis(df):
    df['pri']= (df['green']- df['greenI'])/(df['green']+df['greenI'])
    df['clre'] = (df['nir']/df['rede'])-1
    df['ndre'] = (df['nir'] - df['rede']) / (df['nir'] + df['rede'])
    df['tcari'] = 3 * (( (df['rede'] - df['red']) - 0.2 * (df['rede'] - df['green']) ) * (df['rede'] / df['red']))
    df['ndvi']= (df['nir']-df['red'])/(df['nir']+df['red'])
    df['evi']= 2.5*(df['nir']-df['red'])/(df['nir']+6*df['red']-7.5*df['blue']+1)
    df['savi']= 1.5*(df['nir']-df['red'])/(df['nir']+df['red']+0.5)
    df['msavi']=((2*(df['nir']) + 1 - np.sqrt((2*df['nir'] + 1)*2 - 8*(df['nir'] - df['red'])))/2)
    df['sr']= df['nir']/df['red']
    df['msr']= ((df['nir']/df['red'])-1)/(np.sqrt((df['nir']/df['red'])+1))
    df['gri']=df['green']/df['red']
    df['arvi']=(df['nir']-(2*df['red']-df['blue']))/(df['nir']+(2*df['red']-df['blue']))
    df['cblue_blue'] = (df['c_blue']-df['blue'])/(df['c_blue']+df['blue'])
    df['greenI_cblue'] = (df['greenI']-df['c_blue'])/(df['c_blue']+df['greenI'])
    df['green_cblue'] = (df['green']-df['c_blue'])/(df['c_blue']+df['green'])
    df['yellow_cblue'] = (df['yellow']-df['c_blue'])/(df['c_blue']+df['yellow'])
    df['cblue_red'] = (df['c_blue']-df['red'])/(df['c_blue']+df['red'])
    df['rede_cblue'] = (df['rede']-df['c_blue'])/(df['c_blue']+df['rede'])
    df['nir_cblue'] = (df['nir']-df['c_blue'])/(df['c_blue']+df['nir'])
    df['greenI_blue'] = (df['greenI']-df['blue'])/(df['blue']+df['greenI'])
    df['green_blue'] = (df['green']-df['blue'])/(df['blue']+df['green'])
    df['yellow_blue'] = (df['yellow']-df['blue'])/(df['blue']+df['yellow'])
    df['blue_red'] = (df['blue']-df['red'])/(df['blue']+df['red'])
    df['rede_blue'] = (df['rede']-df['blue'])/(df['blue']+df['rede'])
    df['nir_blue'] = (df['nir']-df['blue'])/(df['nir']+df['blue'])
    df['greenI_yellow'] = (df['greenI']-df['yellow'])/(df['greenI']+df['yellow'])
    df['greenI_red'] = (df['greenI']-df['red'])/(df['greenI']+df['red'])
    df['rede_greenI'] = (df['rede']-df['greenI'])/(df['greenI']+df['rede'])
    df['nir_greenI'] = (df['nir']-df['greenI'])/(df['greenI']+df['nir'])
    df['green_yellow'] = (df['green']-df['yellow'])/(df['green']+df['yellow'])
    df['green_red'] = (df['green']-df['red'])/(df['green']+df['red'])
    df['rede_green'] = (df['rede']-df['green'])/(df['green']+df['rede'])
    df['nir_green'] = (df['nir']-df['green'])/(df['green']+df['nir'])
    df['yellow_red'] = (df['yellow']-df['red'])/(df['yellow']+df['red'])
    df['rede_yellow'] = (df['rede']-df['yellow'])/(df['yellow']+df['rede'])
    df['nir_yellow'] = (df['nir']-df['yellow'])/(df['yellow']+df['nir'])
    df['rede_red']= (df['rede']-df['red'])/(df['rede']+df['red'])


    
    return df

In [72]:
data_dir = ('/Users/kathleenkanaley/Desktop/grapes_from_space/data/')
dis_ras_20 = glob.glob(data_dir+'images/2020/PScope/dis_band_*/*.tif')
dis_ras_21 = glob.glob(data_dir+'images/2021/PScope/dis_band_*/*.tif')
dis_ras_22 = glob.glob(data_dir+'images/2022/PScope/dis_band_*/*.tif')

In [73]:
dis_ras_20

['/Users/kathleenkanaley/Desktop/grapes_from_space/data/images/2020/PScope/dis_band_2020/20200801_151354_03_2212_3B_AnalyticMS_SR_8b_harmonized_clip_clipped.tif_disease.tif',
 '/Users/kathleenkanaley/Desktop/grapes_from_space/data/images/2020/PScope/dis_band_2020/20200713_151457_44_2278_3B_AnalyticMS_SR_8b_harmonized_clip_clipped.tif_disease.tif',
 '/Users/kathleenkanaley/Desktop/grapes_from_space/data/images/2020/PScope/dis_band_2020/20200616_151605_21_2304_3B_AnalyticMS_SR_8b_harmonized_clip_clipped.tif_disease.tif']

In [74]:
cols = ['c_blue','blue','greenI', 'green', 'yellow', 'red', 'rede', 'nir','total_dis_sev','dm_sev', 'dm_inc']

# Get a dataframe for all 2020 images

dfs_2020 = [img_to_df(dis_ras_20[i], cols) for i in np.arange(len(dis_ras_20))]
df_2020 = pd.concat(dfs_2020)

df_2020['total_dis_sev'] = df_2020['total_dis_sev']*10000
df_2020['dm_sev'] = df_2020['dm_sev']*10000
df_2020['dm_inc'] = df_2020['dm_inc']*10000

df_2020['inc_class'] = 'low'
df_2020.loc[df_2020['dm_inc']>25,'inc_class']= 'high'
    
df_2020['severity_class'] = 'low'
df_2020.loc[df_2020['dm_sev']>10,'severity_class']= 'high'

vis_2020 = add_vis(df_2020)

vis_2020.columns

Index(['c_blue', 'blue', 'greenI', 'green', 'yellow', 'red', 'rede', 'nir',
       'total_dis_sev', 'dm_sev', 'dm_inc', 'Date', 'inc_class',
       'severity_class', 'pri', 'clre', 'ndre', 'tcari', 'ndvi', 'evi', 'savi',
       'msavi', 'sr', 'msr', 'gri', 'arvi', 'cblue_blue', 'greenI_cblue',
       'green_cblue', 'yellow_cblue', 'cblue_red', 'rede_cblue', 'nir_cblue',
       'greenI_blue', 'green_blue', 'yellow_blue', 'blue_red', 'rede_blue',
       'nir_blue', 'greenI_yellow', 'greenI_red', 'rede_greenI', 'nir_greenI',
       'green_yellow', 'green_red', 'rede_green', 'nir_green', 'yellow_red',
       'rede_yellow', 'nir_yellow', 'rede_red'],
      dtype='object')

In [75]:
vis_2020.Date.unique()

array(['20200801'], dtype=object)

In [77]:
# Get df for 2021 and 2022

#2021

dfs_2021 = [img_to_df(dis_ras_21[i], cols) for i in np.arange(len(dis_ras_21))]
df_2021 = pd.concat(dfs_2021)

df_2021['total_dis_sev'] = df_2021['total_dis_sev']*10000
df_2021['dm_sev'] = df_2021['dm_sev']*10000
df_2021['dm_inc'] = df_2021['dm_inc']*10000

df_2021['inc_class'] = 'low'
df_2021.loc[df_2021['dm_inc']>25,'inc_class']= 'high'
    
df_2021['severity_class'] = 'low'
df_2021.loc[df_2021['dm_sev']>10,'severity_class']= 'high'

vis_2021 = add_vis(df_2021)
vis_2021 = vis_2021[vis_2021['Date']!='20210804']
#vis_2021.columns



In [78]:
#2022 df

dfs_2022 = [img_to_df(dis_ras_22[i], cols) for i in np.arange(len(dis_ras_22))]
df_2022 = pd.concat(dfs_2022)

df_2022['total_dis_sev'] = df_2022['total_dis_sev']*10000
df_2022['dm_sev'] = df_2022['dm_sev']*10000
df_2022['dm_inc'] = df_2022['dm_inc']*10000

df_2022['inc_class'] = 'low'
df_2022.loc[df_2022['dm_inc']>25,'inc_class']= 'high'
    
df_2022['severity_class'] = 'low'
df_2022.loc[df_2022['dm_sev']>10,'severity_class']= 'high'

vis_2022 = add_vis(df_2022)

print(vis_2022.Date.unique())
print(vis_2021.Date.unique())

['20220726' '20220801' '20220630' '20220704' '20220624']
['20210808' '20210726' '20210715']


In [79]:
def spearman_df(veg_inds, df, var_name):
    corr_coeffs = []
    p_vals = []
    vi_list = []
    
    for vi in veg_inds:
        sp_r, p = scipy.stats.spearmanr(df[var_name], df[vi])
        corr_coeffs.append(sp_r)
        p_vals.append(p)
        vi_list.append(vi)
        dictionary = {'vi':vi_list,'spearmans_r':corr_coeffs, 'p_value':p_vals}
        corr_df = pd.DataFrame(dictionary).sort_values(by='spearmans_r')
        
    return corr_df

In [80]:
vis = ['pri', 'clre', 'ndre', 'tcari', 'ndvi', 'evi', 'savi','msavi',
       'gri', 'arvi', 'cblue_blue', 'greenI_cblue', 'green_cblue',
       'yellow_cblue', 'cblue_red', 'rede_cblue', 'nir_cblue', 'greenI_blue',
       'green_blue', 'yellow_blue', 'blue_red', 'rede_blue', 'nir_blue',
       'greenI_yellow', 'greenI_red', 'rede_greenI', 'nir_greenI',
       'green_yellow', 'green_red', 'rede_green', 'nir_green', 'yellow_red',
       'rede_yellow', 'nir_yellow', 'rede_red']

spear_20 = spearman_df(vis, vis_2020, 'dm_sev')
print("Spearman's rank correlation, GDM Severity 2020\n\n", spear_20)

Spearman's rank correlation, GDM Severity 2020

                vi  spearmans_r       p_value
27   green_yellow    -0.503692  2.962258e-20
32    rede_yellow    -0.500112  6.000367e-20
33     nir_yellow    -0.484095  1.277959e-18
23  greenI_yellow    -0.472495  1.061624e-17
10     cblue_blue    -0.331055  6.357033e-09
14      cblue_red    -0.326211  1.083527e-08
26     nir_greenI    -0.313708  4.111972e-08
0             pri    -0.275252  1.717229e-06
18     green_blue    -0.247175  1.873557e-05
25    rede_greenI    -0.247097  1.885279e-05
7           msavi    -0.222834  1.198293e-04
6            savi    -0.202566  4.852101e-04
22       nir_blue    -0.200643  5.503399e-04
21      rede_blue    -0.190648  1.039703e-03
8             gri    -0.180504  1.921913e-03
28      green_red    -0.180504  1.921913e-03
5             evi    -0.174062  2.793554e-03
3           tcari    -0.160685  5.839375e-03
4            ndvi    -0.150299  9.984706e-03
34       rede_red    -0.140399  1.617743e-02
9     

In [81]:
spear_20inc = spearman_df(vis, vis_2020, 'dm_inc')
print("Spearman's rank correlation, GDM Incidence 2020\n\n", spear_20inc)

Spearman's rank correlation, GDM Incidence 2020

                vi  spearmans_r       p_value
27   green_yellow    -0.613601  1.065020e-31
33     nir_yellow    -0.603138  2.051225e-30
32    rede_yellow    -0.600202  4.612876e-30
23  greenI_yellow    -0.574182  4.277639e-27
26     nir_greenI    -0.402588  7.649366e-13
0             pri    -0.347595  9.583293e-10
28      green_red    -0.340891  2.091288e-09
8             gri    -0.340891  2.091288e-09
5             evi    -0.308234  7.234150e-08
6            savi    -0.305653  9.405077e-08
7           msavi    -0.302689  1.267369e-07
14      cblue_red    -0.296168  2.414306e-07
4            ndvi    -0.292749  3.363781e-07
18     green_blue    -0.282201  9.106873e-07
9            arvi    -0.279272  1.192325e-06
25    rede_greenI    -0.276899  1.479832e-06
3           tcari    -0.275005  1.755828e-06
34       rede_red    -0.269809  2.788160e-06
22       nir_blue    -0.250738  1.404421e-05
10     cblue_blue    -0.245767  2.097022e-05
21   

In [65]:
spear_21sev = spearman_df(vis, vis_2021, 'dm_sev')
print("Spearman's rank correlation, GDM Severity 2021\n\n", spear_21sev)

Spearman's rank correlation, GDM Severity 2021

                vi  spearmans_r       p_value
26     nir_greenI    -0.355209  5.325332e-75
16      nir_cblue    -0.348831  3.203404e-72
34       rede_red    -0.348602  4.020223e-72
13   yellow_cblue    -0.344582  2.098018e-70
25    rede_greenI    -0.341231  5.421637e-69
12    green_cblue    -0.331593  5.015191e-65
15     rede_cblue    -0.330683  1.168327e-64
8             gri    -0.330253  1.739398e-64
28      green_red    -0.330252  1.742504e-64
11   greenI_cblue    -0.322900  1.436525e-61
3           tcari    -0.306772  1.882962e-55
27   green_yellow    -0.300955  2.448335e-53
4            ndvi    -0.297656  3.686241e-52
32    rede_yellow    -0.289971  1.779936e-49
6            savi    -0.288685  4.914734e-49
33     nir_yellow    -0.287165  1.620405e-48
7           msavi    -0.276103  7.691230e-45
5             evi    -0.275123  1.598471e-44
30      nir_green    -0.264711  3.148208e-41
0             pri    -0.261427  3.215262e-40
22    

In [82]:
spear_21inc = spearman_df(vis, vis_2021, 'dm_inc')
print("Spearman's rank correlation, GDM Incidence 2021\n\n", spear_21inc)

Spearman's rank correlation, GDM Incidence 2021

                vi  spearmans_r       p_value
33     nir_yellow    -0.417722  8.010474e-80
27   green_yellow    -0.415896  4.491561e-79
32    rede_yellow    -0.415199  8.652909e-79
26     nir_greenI    -0.413345  4.907176e-78
16      nir_cblue    -0.408234  5.551498e-76
12    green_cblue    -0.406267  3.348592e-75
25    rede_greenI    -0.405804  5.106781e-75
0             pri    -0.405315  7.958952e-75
15     rede_cblue    -0.402330  1.179819e-73
11   greenI_cblue    -0.400639  5.372584e-73
4            ndvi    -0.390413  4.255700e-69
13   yellow_cblue    -0.389079  1.341363e-68
34       rede_red    -0.379211  5.554691e-65
3           tcari    -0.365295  4.333714e-60
6            savi    -0.355672  7.607914e-57
30      nir_green    -0.353790  3.184268e-56
5             evi    -0.348783  1.368081e-54
8             gri    -0.342975  9.847448e-53
28      green_red    -0.342973  9.866586e-53
7           msavi    -0.329129  1.838680e-48
9    

In [67]:
spear_22sev = spearman_df(vis, vis_2022, 'dm_sev')
print("Spearman's rank correlation, GDM Severity 2022\n\n", spear_22sev)

Spearman's rank correlation, GDM Severity 2022

                vi  spearmans_r       p_value
29     rede_green    -0.061533  5.900345e-04
10     cblue_blue    -0.051544  4.007949e-03
25    rede_greenI    -0.045921  1.036885e-02
14      cblue_red    -0.019283  2.819667e-01
20       blue_red     0.001819  9.191758e-01
19    yellow_blue     0.008902  6.194191e-01
0             pri     0.018420  3.040865e-01
3           tcari     0.019405  2.789349e-01
31     yellow_red     0.022200  2.154602e-01
34       rede_red     0.029428  1.005586e-01
32    rede_yellow     0.032559  6.923009e-02
24     greenI_red     0.038046  3.372608e-02
13   yellow_cblue     0.039989  2.562291e-02
21      rede_blue     0.042093  1.880467e-02
23  greenI_yellow     0.062223  5.113291e-04
5             evi     0.067280  1.714286e-04
30      nir_green     0.068262  1.374296e-04
17    greenI_blue     0.068894  1.190193e-04
9            arvi     0.072200  5.500706e-05
15     rede_cblue     0.076564  1.887586e-05
6     

In [68]:
spear_22inc = spearman_df(vis, vis_2022, 'dm_inc')
print("Spearman's rank correlation, GDM Incidence 2022\n\n", spear_22inc)

Spearman's rank correlation, GDM Incidence 2022

                vi  spearmans_r   p_value
19    yellow_blue    -0.059091  0.000968
18     green_blue    -0.032474  0.069953
13   yellow_cblue    -0.025489  0.154958
0             pri    -0.024934  0.164143
31     yellow_red    -0.015675  0.381824
17    greenI_blue    -0.013481  0.451963
12    green_cblue    -0.011829  0.509286
21      rede_blue    -0.004263  0.812007
11   greenI_cblue    -0.002108  0.906386
10     cblue_blue     0.000075  0.996651
15     rede_cblue     0.001758  0.921891
14      cblue_red     0.017345  0.333176
25    rede_greenI     0.021657  0.226903
8             gri     0.023050  0.198398
28      green_red     0.023051  0.198372
27   green_yellow     0.027796  0.120895
29     rede_green     0.030581  0.087914
3           tcari     0.031842  0.075584
24     greenI_red     0.032127  0.073004
34       rede_red     0.037098  0.038416
20       blue_red     0.046830  0.008948
32    rede_yellow     0.052012  0.003688
23  gre