In [1]:
import numpy as np
import pandas as pd
from datetime import date
import time
import scipy
import scipy.signal
import os, os.path
import matplotlib
import matplotlib.pyplot as plt

import sys
sys.path.append('/Users/hn/Documents/00_GitHub/Ag/NASA/Python_codes/')
import NASA_core as nc
# import NASA_plot_core as rcp

# Set up Directories

In [2]:
data_dir = "/Users/hn/Documents/01_research_data/NASA/VI_TS/05_SG_TS/"
ML_data_dir = "/Users/hn/Documents/01_research_data/NASA/ML_data/"

# Toss small fields.

In [3]:
meta_dir = "/Users/hn/Documents/01_research_data/NASA/parameters/"
meta = pd.read_csv(meta_dir+"evaluation_set.csv")
print (meta.shape)
meta=meta[meta.ExctAcr>10]
print (meta.shape)
meta.head(2)

(6340, 8)
(3539, 8)


Unnamed: 0,ID,CropTyp,Irrigtn,DataSrc,Acres,ExctAcr,LstSrvD,county
0,100010_WSDA_SF_2017,alfalfa hay,center pivot,wsda,34,34.310305,2017/09/12,Grant
1,100204_WSDA_SF_2017,alfalfa hay,center pivot,wsda,62,61.826535,2017/08/09,Grant


In [5]:
nonExpert_V = meta_dir + "nonExpert_2605_votes.csv"
nonExpert_V=pd.read_csv(nonExpert_2605_votes)
nonExpert_V.shape

(2605, 12)

# Read TS files

In [6]:
VI_idx="EVI"

file_names = ["SG_Walla2015_" + VI_idx + "_JFD.csv", "SG_AdamBenton2016_" + VI_idx + "_JFD.csv", 
              "SG_Grant2017_" + VI_idx + "_JFD.csv", "SG_FranklinYakima2018_"+ VI_idx +"_JFD.csv"]

data=pd.DataFrame()

for file in file_names:
    curr_file=pd.read_csv(data_dir + file)
    curr_file['human_system_start_time'] = pd.to_datetime(curr_file['human_system_start_time'])
    
    # These data are for 3 years. The middle one is the correct one
    all_years = sorted(curr_file.human_system_start_time.dt.year.unique())
    if len(all_years)==3 or len(all_years)==2:
        proper_year = all_years[1]
    elif len(all_years)==1:
        proper_year = all_years[0]

    curr_file = curr_file[curr_file.human_system_start_time.dt.year==proper_year]
    data=pd.concat([data, curr_file])

data.reset_index(drop=True, inplace=True)
data.loc[data[VI_idx]<0, VI_idx]=0
data.head(2)

Unnamed: 0,ID,human_system_start_time,EVI
0,135073_WSDA_SF_2015,2015-01-10,0.054429
1,135073_WSDA_SF_2015,2015-01-20,0.051731


# Filter the non-expert fields TS

In [7]:
nonExpert_V.head(2)

Unnamed: 0,ID,Hossein,Supriya,Kirti,Mike,Min,Vote,CropTyp,Irrigtn,DataSrc,county,ExctAcr
0,100041_WSDA_SF_2017,Single Crop,Single Crop,Single Crop,Single Crop,Unsure,single,"grape, wine",drip,wsda,Grant,23.441439
1,100048_WSDA_SF_2017,Single Crop,Single Crop,Single Crop,Unsure,none,single,"bean, green",rill,wsda,Grant,18.03324


In [8]:
print (len(data.ID.unique()))
nonExpert_IDs = list(nonExpert_V.ID.unique())
data = data[data.ID.isin(nonExpert_IDs)]
data.reset_index(drop=True, inplace=True)
print (len(data.ID.unique()))

77530
2605


In [9]:
for curr_ID in data.ID.unique():
    crr_fld=data[data.ID==curr_ID].copy()
    crr_fld.reset_index(drop=True, inplace=True)
    # crr_fld['human_system_start_time'] = pd.to_datetime(crr_fld['human_system_start_time'])
    SFYr = crr_fld.human_system_start_time.dt.year.unique()[0]
    fig, ax = plt.subplots();
    fig.set_size_inches(10, 2.5)
    ax.grid(False);
    ax.plot(crr_fld['human_system_start_time'], crr_fld[VI_idx], 
            c ='dodgerblue', linewidth=5)

    ax.axis("off")
    left = crr_fld['human_system_start_time'][0]
    right = crr_fld['human_system_start_time'].values[-1]
    ax.set_xlim([left, right]); # the following line alsow orks
    ax.set_ylim([-0.005, 1]); # the following line alsow orks


    crop_count = nonExpert_V[nonExpert_V.ID==curr_ID]["Vote"].values[0]
    if crop_count==1:
        crop_count_letter="single"
    else:
        crop_count_letter="double"
    
    # nonExpert_V is the same as expert labels!
    plot_path = "/Users/hn/Documents/01_research_data/NASA/ML_data/SG_nonExpert_" + VI_idx + "/"
    os.makedirs(plot_path, exist_ok=True)
    fig_name = plot_path + crop_count_letter + "_" + curr_ID +'.jpg'
    plt.savefig(fname = fig_name, dpi=200, bbox_inches='tight', facecolor="w")
    plt.close('all')
print (plot_path)

/Users/hn/Documents/01_research_data/NASA/ML_data/SG_nonExpert_V_EVI/


# NDVI

In [15]:
VI_idx="NDVI"
file_names = ["SG_Walla2015_" + VI_idx + "_JFD.csv", "SG_AdamBenton2016_" + VI_idx + "_JFD.csv", 
              "SG_Grant2017_" + VI_idx + "_JFD.csv", "SG_FranklinYakima2018_"+ VI_idx +"_JFD.csv"]

data=pd.DataFrame()

for file in file_names:
    curr_file=pd.read_csv(data_dir + file)
    curr_file['human_system_start_time'] = pd.to_datetime(curr_file['human_system_start_time'])
    
    # These data are for 3 years. The middle one is the correct one
    all_years = sorted(curr_file.human_system_start_time.dt.year.unique())
    if len(all_years)==3 or len(all_years)==2:
        proper_year = all_years[1]
    elif len(all_years)==1:
        proper_year = all_years[0]

    curr_file = curr_file[curr_file.human_system_start_time.dt.year==proper_year]
    data=pd.concat([data, curr_file])

data.reset_index(drop=True, inplace=True)
data.loc[data[VI_idx]<0, VI_idx]=0

nonExpert_V_IDs = list(nonExpert_V.ID.unique())
data = data[data.ID.isin(nonExpert_V_IDs)]
data.reset_index(drop=True, inplace=True)
print (len(data.ID.unique()))
data.head(2)

2605


Unnamed: 0,ID,human_system_start_time,NDVI
0,144551_WSDA_SF_2015,2015-01-10,0.192163
1,144551_WSDA_SF_2015,2015-01-20,0.182884


In [16]:
for curr_ID in data.ID.unique():
    crr_fld=data[data.ID==curr_ID].copy()
    crr_fld.reset_index(drop=True, inplace=True)
    SFYr = crr_fld.human_system_start_time.dt.year.unique()[0]
    fig, ax = plt.subplots();
    fig.set_size_inches(10, 2.5)
    ax.grid(False);
    ax.plot(crr_fld['human_system_start_time'], crr_fld[VI_idx], 
            c ='dodgerblue', linewidth=5)

    ax.axis("off")

    left = crr_fld['human_system_start_time'][0]
    right = crr_fld['human_system_start_time'].values[-1]
    ax.set_xlim([left, right]); # the following line alsow orks
    ax.set_ylim([-0.005, 1]); # the following line alsow orks

    crop_count = nonExpert_V[nonExpert_V.ID==curr_ID]["Vote"].values[0]
    if crop_count==1:
        crop_count_letter="single"
    else:
        crop_count_letter="double"
    
    # nonExpert_V is the same as expert labels!
    plot_path = "/Users/hn/Documents/01_research_data/NASA/ML_data/SG_nonExpert_" + VI_idx + "/"
    os.makedirs(plot_path, exist_ok=True)
    fig_name = plot_path + crop_count_letter + "_" + curr_ID +'.jpg'
    plt.savefig(fname = fig_name, dpi=200, bbox_inches='tight', facecolor="w")
    plt.close('all')
    # ax.legend(loc = "upper left");
    
plot_path

'/Users/hn/Documents/01_research_data/NASA/ML_data/SG_nonExpert_NDVI/'

# Regular EVI

In [18]:
VI_idx="EVI"

regular_data_dir = "/Users/hn/Documents/01_research_data/NASA/VI_TS/04_regularized_TS/"
file_names = ["regular_Walla2015_" + VI_idx +"_JFD.csv", "regular_AdamBenton2016_" + VI_idx +"_JFD.csv", 
              "regular_Grant2017_" + VI_idx +"_JFD.csv", "regular_FranklinYakima2018_" + VI_idx +"_JFD.csv"]

data=pd.DataFrame()

for file in file_names:
    curr_file=pd.read_csv(regular_data_dir + file)
    curr_file['human_system_start_time'] = pd.to_datetime(curr_file['human_system_start_time'])
    
    # These data are for 3 years. The middle one is the correct one
    all_years = sorted(curr_file.human_system_start_time.dt.year.unique())
    if len(all_years)==3 or len(all_years)==2:
        proper_year=all_years[1]
    elif len(all_years)==1:
        proper_year=all_years[0]

    curr_file = curr_file[curr_file.human_system_start_time.dt.year==proper_year]
    data=pd.concat([data, curr_file])

data.reset_index(drop=True, inplace=True)
data.loc[data[VI_idx]<0, VI_idx]=0

nonExpert_V_IDs = list(nonExpert_V.ID.unique())
data = data[data.ID.isin(nonExpert_V_IDs)]
data.reset_index(drop=True, inplace=True)

print (len(data.ID.unique()))
data.head(2)

2605


Unnamed: 0,ID,human_system_start_time,EVI
0,144551_WSDA_SF_2015,2015-01-10,0.027474
1,144551_WSDA_SF_2015,2015-01-20,0.040504


In [19]:
for curr_ID in data.ID.unique():
    crr_fld=data[data.ID==curr_ID].copy()
    crr_fld.reset_index(drop=True, inplace=True)
    SFYr = crr_fld.human_system_start_time.dt.year.unique()[0]
    fig, ax = plt.subplots();
    fig.set_size_inches(10, 2.5)
    ax.grid(False);
    ax.plot(crr_fld['human_system_start_time'], crr_fld[VI_idx], 
            c ='dodgerblue', linewidth=5)

    ax.axis("off")

    left = crr_fld['human_system_start_time'][0]
    right = crr_fld['human_system_start_time'].values[-1]
    ax.set_xlim([left, right]); # the following line alsow orks
    ax.set_ylim([-0.005, 1]); # the following line alsow orks

    crop_count = nonExpert_V[nonExpert_V.ID==curr_ID]["Vote"].values[0]
    if crop_count==1:
        crop_count_letter="single"
    else:
        crop_count_letter="double"
    
    # nonExpert_V is the same as expert labels!
    plot_path = "/Users/hn/Documents/01_research_data/NASA/ML_data/regular_nonExpert_" + VI_idx + "/"
    os.makedirs(plot_path, exist_ok=True)
    fig_name = plot_path + crop_count_letter + "_" + curr_ID +'.jpg'
    plt.savefig(fname = fig_name, dpi=200, bbox_inches='tight', facecolor="w")
    plt.close('all')
    # ax.legend(loc = "upper left");
    
plot_path

'/Users/hn/Documents/01_research_data/NASA/ML_data/regular_nonExpert_EVI/'

# Regular NDVI

In [20]:
VI_idx="NDVI"

regular_data_dir = "/Users/hn/Documents/01_research_data/NASA/VI_TS/04_regularized_TS/"
file_names = ["regular_Walla2015_"+VI_idx +"_JFD.csv", "regular_AdamBenton2016_"+VI_idx +"_JFD.csv", 
              "regular_Grant2017_"+VI_idx +"_JFD.csv", "regular_FranklinYakima2018_"+VI_idx +"_JFD.csv"]

data=pd.DataFrame()

for file in file_names:
    curr_file=pd.read_csv(regular_data_dir + file)
    curr_file['human_system_start_time'] = pd.to_datetime(curr_file['human_system_start_time'])
    
    # These data are for 3 years. The middle one is the correct one
    all_years = sorted(curr_file.human_system_start_time.dt.year.unique())
    if len(all_years)==3 or len(all_years)==2:
        proper_year = all_years[1]
    elif len(all_years)==1:
        proper_year = all_years[0]

    curr_file = curr_file[curr_file.human_system_start_time.dt.year==proper_year]
    data=pd.concat([data, curr_file])

data.reset_index(drop=True, inplace=True)
data.loc[data[VI_idx]<0, VI_idx]=0

nonExpert_V_IDs = list(nonExpert_V.ID.unique())
data = data[data.ID.isin(nonExpert_V_IDs)]
data.reset_index(drop=True, inplace=True)

print (len(data.ID.unique()))
data.head(2)

2605


Unnamed: 0,ID,human_system_start_time,NDVI
0,144551_WSDA_SF_2015,2015-01-10,0.3464
1,144551_WSDA_SF_2015,2015-01-20,0.058013


In [21]:
for curr_ID in data.ID.unique():
    crr_fld=data[data.ID==curr_ID].copy()
    crr_fld.reset_index(drop=True, inplace=True)
    SFYr = crr_fld.human_system_start_time.dt.year.unique()[0]
    fig, ax = plt.subplots();
    fig.set_size_inches(10, 2.5)
    ax.grid(False);
    ax.plot(crr_fld['human_system_start_time'], crr_fld[VI_idx], 
            c ='dodgerblue', linewidth=5)

    ax.axis("off")

    left = crr_fld['human_system_start_time'][0]
    right = crr_fld['human_system_start_time'].values[-1]
    ax.set_xlim([left, right]); # the following line alsow orks
    ax.set_ylim([-0.005, 1]); # the following line alsow orks

    crop_count = nonExpert_V[nonExpert_V.ID==curr_ID]["Vote"].values[0]
    if crop_count==1:
        crop_count_letter="single"
    else:
        crop_count_letter="double"
    
    # nonExpert_V is the same as expert labels!
    plot_path = "/Users/hn/Documents/01_research_data/NASA/ML_data/regular_nonExpert_" + VI_idx + "/"
    os.makedirs(plot_path, exist_ok=True)
    fig_name = plot_path + crop_count_letter + "_" + curr_ID +'.jpg'
    plt.savefig(fname = fig_name, dpi=200, bbox_inches='tight', facecolor="w")
    plt.close('all')

plot_path

'/Users/hn/Documents/01_research_data/NASA/ML_data/regular_nonExpert_NDVI/'