In [1]:
import numpy as np
import pandas as pd
import scipy, scipy.signal

from datetime import date
import time

import random
from random import seed
from random import random

import os, os.path
import shutil

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

import matplotlib
import matplotlib.pyplot as plt
from pylab import imshow

import h5py
import pickle
import sys


In [2]:
sys.path.append('/Users/hn/Documents/00_GitHub/Ag/NASA/Python_codes/')
import NASA_core as nc
# import NASA_plot_core.py as rcp

In [3]:
from tslearn.metrics import dtw as dtw_metric

# https://dtaidistance.readthedocs.io/en/latest/usage/dtw.html
from dtaidistance import dtw
from dtaidistance import dtw_visualisation as dtwvis

In [4]:
meta_dir = "/Users/hn/Documents/01_research_data/NASA/parameters/"
meta = pd.read_csv(meta_dir+"evaluation_set.csv")
meta_moreThan10Acr=meta[meta.ExctAcr>10]
print (meta.shape)
print (meta_moreThan10Acr.shape)
meta.head(2)

(6340, 8)
(3539, 8)


Unnamed: 0,ID,CropTyp,Irrigtn,DataSrc,Acres,ExctAcr,LstSrvD,county
0,100010_WSDA_SF_2017,alfalfa hay,center pivot,wsda,34,34.310305,2017/09/12,Grant
1,100204_WSDA_SF_2017,alfalfa hay,center pivot,wsda,62,61.826535,2017/08/09,Grant


In [5]:
# Read Training Set Labels

In [6]:
training_set_dir = "/Users/hn/Documents/01_research_data/NASA/ML_data/"
ground_truth_labels = pd.read_csv(training_set_dir+"train_labels.csv")
print ("Unique Votes: ", ground_truth_labels.Vote.unique())
print (len(ground_truth_labels.ID.unique()))
ground_truth_labels.head(2)

Unique Votes:  [2 1]
1849


Unnamed: 0,ID,Vote
0,99837_WSDA_SF_2017,2
1,114615_WSDA_SF_2017,1


In [8]:
VI_idx="EVI"

# Read the Data

In [9]:
SG_data_dir = "/Users/hn/Documents/01_research_data/NASA/VI_TS/05_SG_TS/"
file_names = ["SG_Walla2015_" + VI_idx + "_JFD.csv", "SG_AdamBenton2016_" + VI_idx + "_JFD.csv", 
              "SG_Grant2017_" + VI_idx + "_JFD.csv", "SG_FranklinYakima2018_"+ VI_idx +"_JFD.csv"]

SG_data_4_plot=pd.DataFrame()

for file in file_names:
    curr_file=pd.read_csv(SG_data_dir + file)
    curr_file['human_system_start_time'] = pd.to_datetime(curr_file['human_system_start_time'])
    
    # These data are for 3 years. The middle one is the correct one
    all_years = sorted(curr_file.human_system_start_time.dt.year.unique())
    if len(all_years)==3 or len(all_years)==2:
        proper_year = all_years[1]
    elif len(all_years)==1:
        proper_year = all_years[0]

    curr_file = curr_file[curr_file.human_system_start_time.dt.year==proper_year]
    SG_data_4_plot=pd.concat([SG_data_4_plot, curr_file])

SG_data_4_plot.reset_index(drop=True, inplace=True)
SG_data_4_plot.head(2)

Unnamed: 0,ID,human_system_start_time,EVI
0,135073_WSDA_SF_2015,2015-01-10,0.054429
1,135073_WSDA_SF_2015,2015-01-20,0.051731


In [13]:
landsat_dir = "/Users/hn/Documents/01_research_data/NASA/VI_TS/data_for_train_individual_counties/"
landsat_fNames = [x for x in os.listdir(landsat_dir) if x.endswith(".csv")]

landsat_DF = pd.DataFrame()
for fName in landsat_fNames:
    curr = pd.read_csv(landsat_dir+fName)
    curr.dropna(subset=[VI_idx], inplace=True)
    landsat_DF=pd.concat([landsat_DF, curr])

In [14]:
data_dir = "/Users/hn/Documents/01_research_data/NASA/VI_TS/04_regularized_TS/"
file_names = ["regular_Walla2015_" + VI_idx + "_JFD.csv", 
              "regular_AdamBenton2016_" + VI_idx + "_JFD.csv", 
              "regular_Grant2017_" + VI_idx + "_JFD.csv", 
              "regular_FranklinYakima2018_"+ VI_idx +"_JFD.csv"]

data=pd.DataFrame()

for file in file_names:
    curr_file=pd.read_csv(data_dir + file)
    curr_file['human_system_start_time'] = pd.to_datetime(curr_file['human_system_start_time'])
    
    # These data are for 3 years. The middle one is the correct one
    all_years = sorted(curr_file.human_system_start_time.dt.year.unique())
    if len(all_years)==3 or len(all_years)==2:
        proper_year = all_years[1]
    elif len(all_years)==1:
        proper_year = all_years[0]

    curr_file = curr_file[curr_file.human_system_start_time.dt.year==proper_year]
    data=pd.concat([data, curr_file])

data.reset_index(drop=True, inplace=True)
data.head(2)

Unnamed: 0,ID,human_system_start_time,EVI
0,135073_WSDA_SF_2015,2015-01-10,0.049624
1,135073_WSDA_SF_2015,2015-01-20,0.026503


In [15]:
ground_truth = data[data.ID.isin(list(ground_truth_labels.ID.unique()))].copy()

# Toss small fields

In [16]:
ground_truth_labels_extended = pd.merge(ground_truth_labels, meta, on=['ID'], how='left')
ground_truth_labels = ground_truth_labels_extended[ground_truth_labels_extended.ExctAcr>=10].copy()
ground_truth_labels.reset_index(drop=True, inplace=True)

print ("There are [{:.0f}] fields in total whose"\
       "area adds up to [{:.2f}].".format(len(ground_truth_labels_extended), \
                                              ground_truth_labels_extended.ExctAcr.sum()))


print ("There are [{:.0f}] fields larger than 10 acres whose"\
       "area adds up to [{:.2f}].".format(len(ground_truth_labels), \
                                            ground_truth_labels.ExctAcr.sum()))


There are [1849] fields in total whosearea adds up to [85573.68].
There are [1342] fields larger than 10 acres whosearea adds up to [83430.79].


In [17]:
ground_truth = ground_truth[ground_truth.ID.isin((list(meta_moreThan10Acr.ID)))].copy()
ground_truth_labels = ground_truth_labels[ground_truth_labels.ID.isin((list(meta_moreThan10Acr.ID)))].copy()

ground_truth.reset_index(drop=True, inplace=True)
ground_truth_labels.reset_index(drop=True, inplace=True)

# Sort the order of time-series and experts' labels identically

In [18]:
ground_truth.sort_values(by=["ID", 'human_system_start_time'], inplace=True)
ground_truth_labels.sort_values(by=["ID"], inplace=True)

ground_truth.reset_index(drop=True, inplace=True)
ground_truth_labels.reset_index(drop=True, inplace=True)

assert (len(ground_truth.ID.unique()) == len(ground_truth_labels.ID.unique()))

print (list(ground_truth.ID)[0])
print (list(ground_truth_labels.ID)[0])
print ("____________________________________")
print (list(ground_truth.ID)[-1])
print (list(ground_truth_labels.ID)[-1])
print ("____________________________________")
print (list(ground_truth.ID.unique())==list(ground_truth_labels.ID.unique()))

100048_WSDA_SF_2017
100048_WSDA_SF_2017
____________________________________
99909_WSDA_SF_2017
99909_WSDA_SF_2017
____________________________________
True


# Widen Ground Truth Table

In [19]:
EVI_colnames = [VI_idx + "_" + str(ii) for ii in range(1, 37) ]
columnNames = ["ID"] + EVI_colnames
ground_truth_wide = pd.DataFrame(columns=columnNames, 
                                index=range(len(ground_truth.ID.unique())))
ground_truth_wide["ID"] = ground_truth.ID.unique()

for an_ID in ground_truth.ID.unique():
    curr_df = ground_truth[ground_truth.ID==an_ID]
    
    ground_truth_wide_indx = ground_truth_wide[ground_truth_wide.ID==an_ID].index
    ground_truth_wide.loc[ground_truth_wide_indx, "EVI_1":"EVI_36"] = curr_df.EVI.values[:36]

# Split Train and Test Set

#### Make sure rows of ```ground_truth_allBands``` and ```ground_truth_labels``` are in the same order

In [20]:
ground_truth_labels = ground_truth_labels.set_index('ID')
ground_truth_labels = ground_truth_labels.reindex(index=ground_truth_wide['ID'])
ground_truth_labels = ground_truth_labels.reset_index()

In [21]:
print (ground_truth_labels.ExctAcr.min())
ground_truth_labels.head(2)

10.0708703567


Unnamed: 0,ID,Vote,CropTyp,Irrigtn,DataSrc,Acres,ExctAcr,LstSrvD,county
0,100048_WSDA_SF_2017,1,"bean, green",rill,wsda,18,18.03324,2017/05/14,Grant
1,100081_WSDA_SF_2017,1,wheat,rill,wsda,16,15.959744,2017/08/09,Grant


In [22]:
ground_truth_labels=ground_truth_labels[["ID", "Vote"]]
ground_truth_labels.head(2)

Unnamed: 0,ID,Vote
0,100048_WSDA_SF_2017,1
1,100081_WSDA_SF_2017,1


In [23]:
x_train_df, x_test_df, y_train_df, y_test_df = train_test_split(ground_truth_wide, 
                                                                ground_truth_labels, 
                                                                test_size=0.2, 
                                                                random_state=0,
                                                                shuffle=True,
                                                                stratify=ground_truth_labels.Vote.values)

In [24]:
landsat_DF = landsat_DF[landsat_DF.ID.isin(list(y_test_df.ID))]
landsat_DF = nc.add_human_start_time_by_system_start_time(landsat_DF)
landsat_DF.reset_index(drop=True, inplace=True)
landsat_DF.head(2)

Unnamed: 0,ID,NDVI,EVI,system_start_time,human_system_start_time
0,57621_WSDA_SF_2016,0.015873,0.031588,1420137000000.0,2015-01-01
1,55397_WSDA_SF_2016,0.019715,0.04485,1420137000000.0,2015-01-01


# Read SVM regular From Disk

In [25]:
model_dir = "/Users/hn/Documents/01_research_data/NASA/ML_Models/"

filename = model_dir + 'SVM_classifier_balanced_regularEVI_00.sav'
SVM_classifier_balanced_00 = pickle.load(open(filename, 'rb'))

filename = model_dir + 'SVM_classifier_NoneWeight_regularEVI_00.sav'
SVM_classifier_NoneWeight_00 = pickle.load(open(filename, 'rb'))

#### Predict SVMs on regular data

In [26]:
SVM_classifier_NoneWeight_00_predictions = SVM_classifier_NoneWeight_00.predict(x_test_df.iloc[:, 1:])
SVM_classifier_balanced_00_predictions = SVM_classifier_balanced_00.predict(x_test_df.iloc[:, 1:])

#### Form Table of Mistakes of SVM

In [27]:
SVM_balanced_y_test_df=y_test_df.copy()
SVM_None_y_test_df=y_test_df.copy()
SVM_balanced_y_test_df["prediction"] = list(SVM_classifier_balanced_00_predictions)
SVM_balanced_y_test_df.head(2)

Unnamed: 0,ID,Vote,prediction
1221,7667_WSDA_SF_2016,1,1
1334,99748_WSDA_SF_2017,1,1


In [28]:
SVM_None_y_test_df=y_test_df.copy()
SVM_None_y_test_df=y_test_df.copy()
SVM_None_y_test_df["prediction"] = list(SVM_classifier_NoneWeight_00_predictions)
SVM_None_y_test_df.head(2)

Unnamed: 0,ID,Vote,prediction
1221,7667_WSDA_SF_2016,1,1
1334,99748_WSDA_SF_2017,1,1


#### Write down the test result on the disk

In [29]:
test_result_dir = "/Users/hn/Documents/01_research_data/NASA/ML_data/test_results/"
os.makedirs(test_result_dir, exist_ok=True)

In [30]:
out_name=test_result_dir+ "regular_SVM_balancedWeight_y_test.csv"
SVM_balanced_y_test_df.to_csv(out_name, index = False)

In [31]:
out_name=test_result_dir+ "regular_SVM_NoneWeight_y_test.csv"
SVM_None_y_test_df.to_csv(out_name, index = False)

#### Print the mistakes crop types and plot them

In [32]:
SVM_None_y_test_df = pd.merge(SVM_None_y_test_df, meta, on=['ID'], how='left')
SVM_balanced_y_test_df = pd.merge(SVM_balanced_y_test_df, meta, on=['ID'], how='left')

In [33]:
# balanced_y_test_df_A2_P1 = balanced_y_test_df[balanced_y_test_df.Vote==2]
# balanced_y_test_df_A2_P1 = balanced_y_test_df_A2_P1[balanced_y_test_df_A2_P1.prediction==1]

# balanced_y_test_df_A1_P2 = balanced_y_test_df[balanced_y_test_df.Vote==1]
# balanced_y_test_df_A1_P2 = balanced_y_test_df_A1_P2[balanced_y_test_df_A1_P2.prediction==2]

In [34]:
SVM_None_y_test_df_A2_P1 = SVM_None_y_test_df[SVM_None_y_test_df.Vote==2]
SVM_None_y_test_df_A2_P1 = SVM_None_y_test_df_A2_P1[SVM_None_y_test_df_A2_P1.prediction==1]

SVM_None_y_test_df_A1_P2 = SVM_None_y_test_df[SVM_None_y_test_df.Vote==1]
SVM_None_y_test_df_A1_P2 = SVM_None_y_test_df_A1_P2[SVM_None_y_test_df_A1_P2.prediction==2]

In [35]:
SVM_None_y_test_df_A2_P1

Unnamed: 0,ID,Vote,prediction,CropTyp,Irrigtn,DataSrc,Acres,ExctAcr,LstSrvD,county
20,60678_WSDA_SF_2016,2,1,"bean, dry",center pivot,wsda,32,32.322881,2016/08/17 00:00:00,Adams
74,2660_WSDA_SF_2016,2,1,"corn, field",center pivot,wsda,127,127.042075,2016/08/31 00:00:00,Benton
199,53718_WSDA_SF_2016,2,1,triticale,center pivot,wsda,124,124.224249,2016/08/17 00:00:00,Adams
206,106509_WSDA_SF_2017,2,1,yellow mustard,center pivot,wsda,135,134.651954,2017/10/17,Grant


In [36]:
SVM_None_y_test_df_A1_P2

Unnamed: 0,ID,Vote,prediction,CropTyp,Irrigtn,DataSrc,Acres,ExctAcr,LstSrvD,county
98,34906_WSDA_SF_2018,1,2,"corn, field",rill,wsda,34,33.537047,2018/10/01 00:00:00,Yakima
113,98404_WSDA_SF_2017,1,2,"bean, dry",center pivot,wsda,58,57.889176,2017/08/28,Grant
183,101372_WSDA_SF_2017,1,2,canola,center pivot,wsda,80,80.255504,2017/10/16,Grant
252,99643_WSDA_SF_2018,1,2,"corn, field",center pivot,wsda,37,36.938837,2018/06/06 00:00:00,Franklin


In [37]:
sorted(SVM_None_y_test_df_A1_P2.CropTyp)

['bean, dry', 'canola', 'corn, field', 'corn, field']

#### Plot SVM mistakes

In [38]:
size = 15
title_FontSize = 8
legend_FontSize = 8
tick_FontSize = 12
label_FontSize = 14

params = {'legend.fontsize': 15, # medium, large
          # 'figure.figsize': (6, 4),
          'axes.labelsize': size,
          'axes.titlesize': size*1.2,
          'xtick.labelsize': size, #  * 0.75
          'ytick.labelsize': size, #  * 0.75
          'axes.titlepad': 10}

#
#  Once set, you cannot change them, unless restart the notebook
#
plt.rc('font', family = 'Palatino')
plt.rcParams['xtick.bottom'] = True
plt.rcParams['ytick.left'] = True
plt.rcParams['xtick.labelbottom'] = True
plt.rcParams['ytick.labelleft'] = True
plt.rcParams.update(params)
# pylab.rcParams.update(params)
# plt.rc('text', usetex=True)

def plot_oneColumn_CropTitle(dt, raw_dt, titlee, _label = "raw", idx="EVI", _color="dodgerblue"):
    
    fig, ax = plt.subplots(1, 1, figsize=(15, 4), sharex=False, sharey='col', # sharex=True, sharey=True,
                           gridspec_kw={'hspace': 0.35, 'wspace': .05});
    ax.grid(True);
    ax.plot(dt['human_system_start_time'], dt[idx], linewidth=4, color=_color, label=_label) 

    ax.scatter(raw_dt['human_system_start_time'], raw_dt[idx], s=20, c="r", label="raw")

    ax.set_title(titlee)
    ax.set_ylabel(idx) # , labelpad=20); # fontsize = label_FontSize,
    ax.tick_params(axis='y', which='major') #, labelsize = tick_FontSize)
    ax.tick_params(axis='x', which='major') #, labelsize = tick_FontSize) # 
    ax.legend(loc="upper right");
    plt.yticks(np.arange(0, 1.05, 0.2))
    # ax.xaxis.set_major_locator(mdates.YearLocator(1))
    ax.set_ylim(-0.1, 1.05)

In [39]:
test_result_dir = "/Users/hn/Documents/01_research_data/NASA/ML_data/test_results/"
os.makedirs(test_result_dir, exist_ok=True)

In [42]:
SVM_None_y_test_df_A2_P1.rename(columns={"prediction": "SVM_None_pred_A2P1"}, inplace=True)
SVM_None_y_test_df_A1_P2.rename(columns={"prediction": "SVM_None_pred_A1P2"}, inplace=True)

In [43]:
for anID in list(SVM_None_y_test_df_A1_P2.ID):
    curr_dt = SG_data_4_plot[SG_data_4_plot.ID==anID].copy()
    curr_meta = meta[meta.ID==anID].copy()
    
    curr_year=curr_dt.human_system_start_time.dt.year.unique()[0]
    curr_raw = landsat_DF[landsat_DF.ID==anID].copy()
    curr_raw=curr_raw[curr_raw.human_system_start_time.dt.year==curr_year]
        
    curr_vote = SVM_None_y_test_df_A1_P2[SVM_None_y_test_df_A1_P2.ID==anID].Vote.values[0]
    curr_pred = SVM_None_y_test_df_A1_P2[SVM_None_y_test_df_A1_P2.ID==anID].SVM_None_pred_A1P2.values[0]
    
    title_ = list(curr_meta.CropTyp)[0] + " (" + str(list(curr_meta.Acres)[0]) + " acre), "+\
             "Experts' vote: " + str(curr_vote) + ", prediction: " + str(curr_pred)
    
    curr_plt = plot_oneColumn_CropTitle(dt=curr_dt, raw_dt=curr_raw, 
                                        titlee=title_, _label = "EVI (5-step smoothed)")

    
    plot_path = test_result_dir + "regular_SVM_None_plots_A1_P2/"
    os.makedirs(plot_path, exist_ok=True)
    fig_name = plot_path + anID + '.pdf'
    plt.savefig(fname = fig_name, dpi=400, bbox_inches='tight')
    plt.close('all')
    
for anID in list(SVM_None_y_test_df_A2_P1.ID):
    curr_dt = SG_data_4_plot[SG_data_4_plot.ID==anID].copy()
    curr_meta = meta[meta.ID==anID].copy()
    
    curr_year=curr_dt.human_system_start_time.dt.year.unique()[0]
    curr_raw = landsat_DF[landsat_DF.ID==anID].copy()
    curr_raw=curr_raw[curr_raw.human_system_start_time.dt.year==curr_year]    
    
    curr_vote = SVM_None_y_test_df_A2_P1[SVM_None_y_test_df_A2_P1.ID==anID].Vote.values[0]
    curr_pred = SVM_None_y_test_df_A2_P1[SVM_None_y_test_df_A2_P1.ID==anID].SVM_None_pred_A2P1.values[0]
    
    title_ = list(curr_meta.CropTyp)[0] + " (" + str(list(curr_meta.Acres)[0]) + " acre), "+\
             "Experts' vote: " + str(curr_vote) + ", prediction: " + str(curr_pred)
    
    curr_plt = plot_oneColumn_CropTitle(dt=curr_dt, raw_dt=curr_raw, titlee=title_, 
                                        _label = "EVI (5-step smoothed)")
    
    
    plot_path = test_result_dir + "regular_SVM_None_plots_A2_P1/"
    os.makedirs(plot_path, exist_ok=True)
    fig_name = plot_path + anID + '.pdf'
    plt.savefig(fname = fig_name, dpi=400, bbox_inches='tight')
    plt.close('all')

# Random Forest

In [44]:
filename = model_dir + 'regular_forest_default.sav'
regular_forest_default_model = pickle.load(open(filename, 'rb'))

In [45]:
regular_forest_default_preds = regular_forest_default_model.predict(x_test_df.iloc[:, 1:])
regular_forest_default_y_test_df=y_test_df.copy()
regular_forest_default_y_test_df["prediction"]=list(regular_forest_default_preds)
regular_forest_default_y_test_df.head(2)

Unnamed: 0,ID,Vote,prediction
1221,7667_WSDA_SF_2016,1,1
1334,99748_WSDA_SF_2017,1,1


In [46]:
out_name=test_result_dir+ "regular_RF_default_y_test.csv"
regular_forest_default_y_test_df.to_csv(out_name, index = False)

In [47]:
true_single_predicted_single=0
true_single_predicted_double=0

true_double_predicted_single=0
true_double_predicted_double=0

for index_ in regular_forest_default_y_test_df.index:
    curr_vote=list(regular_forest_default_y_test_df[regular_forest_default_y_test_df.index==index_].Vote)[0]
    curr_predict=list(regular_forest_default_y_test_df[regular_forest_default_y_test_df.index==index_].prediction)[0]
    if curr_vote==curr_predict:
        if curr_vote==1: 
            true_single_predicted_single+=1
        else:
            true_double_predicted_double+=1
    else:
        if curr_vote==1:
            true_single_predicted_double+=1
        else:
            true_double_predicted_single+=1
            
forest_grid_1_confus_tbl_test = pd.DataFrame(columns=['None', 'Predict_Single', 'Predict_Double'], 
                               index=range(2))
forest_grid_1_confus_tbl_test.loc[0, 'None'] = 'Actual_Single'
forest_grid_1_confus_tbl_test.loc[1, 'None'] = 'Actual_Double'
forest_grid_1_confus_tbl_test['Predict_Single']=0
forest_grid_1_confus_tbl_test['Predict_Double']=0

forest_grid_1_confus_tbl_test.loc[0, "Predict_Single"]=true_single_predicted_single
forest_grid_1_confus_tbl_test.loc[0, "Predict_Double"]=true_single_predicted_double
forest_grid_1_confus_tbl_test.loc[1, "Predict_Single"]=true_double_predicted_single
forest_grid_1_confus_tbl_test.loc[1, "Predict_Double"]=true_double_predicted_double
forest_grid_1_confus_tbl_test

Unnamed: 0,None,Predict_Single,Predict_Double
0,Actual_Single,218,1
1,Actual_Double,11,39


In [48]:
teset_set_fromBefore = pd.read_csv(training_set_dir+"test20_split_expertLabels_2Bconsistent.csv")

In [49]:
x_test_df.head(2)

Unnamed: 0,ID,EVI_1,EVI_2,EVI_3,EVI_4,EVI_5,EVI_6,EVI_7,EVI_8,EVI_9,...,EVI_27,EVI_28,EVI_29,EVI_30,EVI_31,EVI_32,EVI_33,EVI_34,EVI_35,EVI_36
1221,7667_WSDA_SF_2016,0.051639,0.080531,0.095567,0.110602,0.125638,0.140673,0.224918,0.275751,0.326585,...,0.272984,0.302074,0.263391,0.224708,0.229474,0.23424,0.210145,0.186049,0.161954,0.137858
1334,99748_WSDA_SF_2017,0.028091,0.032607,0.037122,0.041638,0.04685,0.052062,0.119223,0.103029,0.124796,...,0.279474,0.242019,0.21619,0.176909,0.088454,0.0,0.061158,0.122317,0.128664,0.135011


In [50]:
regular_forest_default_y_test_df = pd.merge(regular_forest_default_y_test_df, meta, on=['ID'], how='left')

In [51]:
forest_default_yTest_A1P2 = regular_forest_default_y_test_df[regular_forest_default_y_test_df.Vote==1]
forest_default_yTest_A1P2 = forest_default_yTest_A1P2[forest_default_yTest_A1P2.prediction==2]

forest_default_yTest_A2P1 = regular_forest_default_y_test_df[regular_forest_default_y_test_df.Vote==2]
forest_default_yTest_A2P1 = forest_default_yTest_A2P1[forest_default_yTest_A2P1.prediction==1]

In [52]:
forest_default_yTest_A2P1.groupby(['CropTyp'])['CropTyp'].count()

CropTyp
barley            1
bean, dry         1
buckwheat         3
corn, field       2
triticale         1
wheat             1
yellow mustard    2
Name: CropTyp, dtype: int64

In [53]:
forest_default_yTest_A1P2.groupby(['CropTyp'])['CropTyp'].count()

CropTyp
market crops    1
Name: CropTyp, dtype: int64

In [55]:
forest_default_yTest_A2P1.rename(columns={"prediction": "RF_default_pred_A2P1"}, inplace=True)
forest_default_yTest_A1P2.rename(columns={"prediction": "RF_default_pred_A1P2"}, inplace=True)

In [56]:
for anID in list(forest_default_yTest_A1P2.ID):
    curr_dt = SG_data_4_plot[SG_data_4_plot.ID==anID].copy()
    curr_meta = meta[meta.ID==anID].copy()
    
    curr_year=curr_dt.human_system_start_time.dt.year.unique()[0]
    curr_raw = landsat_DF[landsat_DF.ID==anID].copy()
    curr_raw=curr_raw[curr_raw.human_system_start_time.dt.year==curr_year]
    

    curr_vote = forest_default_yTest_A1P2[forest_default_yTest_A1P2.ID==anID].Vote.values[0]
    curr_pred = forest_default_yTest_A1P2[forest_default_yTest_A1P2.ID==anID].RF_default_pred_A1P2.values[0]    
    title_ = list(curr_meta.CropTyp)[0] + " (" + str(list(curr_meta.Acres)[0]) + " acre), "+\
             "Experts' vote: " + str(curr_vote) + ", prediction: " + str(curr_pred)
    curr_plt = plot_oneColumn_CropTitle(dt=curr_dt, raw_dt=curr_raw, titlee=title_, _label = "EVI (5-step smoothed)")
    
    plot_path = test_result_dir + "regular_RF_plots_A1_P2/"
    os.makedirs(plot_path, exist_ok=True)
    fig_name = plot_path + anID + '.pdf'
    plt.savefig(fname = fig_name, dpi=400, bbox_inches='tight')
    plt.close('all')
    
    
for anID in list(forest_default_yTest_A2P1.ID):
    curr_dt = SG_data_4_plot[SG_data_4_plot.ID==anID].copy()
    curr_meta = meta[meta.ID==anID].copy()
    
    curr_year=curr_dt.human_system_start_time.dt.year.unique()[0]
    curr_raw = landsat_DF[landsat_DF.ID==anID].copy()
    curr_raw=curr_raw[curr_raw.human_system_start_time.dt.year==curr_year]
    
    curr_vote = forest_default_yTest_A2P1[forest_default_yTest_A2P1.ID==anID].Vote.values[0]
    curr_pred = forest_default_yTest_A2P1[forest_default_yTest_A2P1.ID==anID].RF_default_pred_A2P1.values[0]    
    title_ = list(curr_meta.CropTyp)[0] + " (" + str(list(curr_meta.Acres)[0]) + " acre), "+\
             "Experts' vote: " + str(curr_vote) + ", prediction: " + str(curr_pred)

    curr_plt = plot_oneColumn_CropTitle(dt=curr_dt, raw_dt=curr_raw, titlee=title_, _label = "EVI (5-step smoothed)")
    
    plot_path = test_result_dir + "regular_RF_plots_A2_P1/"
    os.makedirs(plot_path, exist_ok=True)
    fig_name = plot_path + anID + '.pdf'
    plt.savefig(fname = fig_name, dpi=400, bbox_inches='tight')
    plt.close('all')

# Neural Nets. Deep Learning. Transfer Learning

### Need to complete this
Choose a probability threshold... etc. 

In [93]:
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.applications.vgg16 import VGG16
from keras.models import Model
from keras.layers import Dense
from keras.layers import Flatten
import tensorflow as tf
from keras.models import load_model

from tensorflow.keras.utils import load_img
from tensorflow.keras.utils import img_to_array
from keras.models import load_model

In [None]:
# model_dir = "/Users/hn/Documents/01_research_data/NASA/ML_Models/"
# model = load_model(model_dir + '01_TL_SingleDoubleEVI_regular_train80.h5')

In [None]:
# training_set_dir = "/Users/hn/Documents/01_research_data/NASA/ML_data/"
# ground_truth_labels = pd.read_csv(training_set_dir+"train_labels.csv")

In [None]:
# expert_test_dir = "/Users/hn/Documents/01_research_data/NASA/ML_data/regular_train_images_" + VI_idx + "/test20/"

# test_filenames = os.listdir(expert_test_dir)
# expert_test_df = pd.DataFrame({
#                                'filename': test_filenames
#                                     })
# nb_samples = expert_test_df.shape[0]

# expert_test_df["human_predict"] = expert_test_df.filename.str.split("_", expand=True)[0]
# expert_test_df["prob_single"]=-1.0
# print (expert_test_df.shape)
# expert_test_df.head(2)

In [None]:
# def load_image(filename):
#     # load the image
#     img = load_img(filename, target_size=(224, 224))
#     # convert to array
#     img = img_to_array(img)
#     # reshape into a single sample with 3 channels
#     img = img.reshape(1, 224, 224, 3)
#     # center pixel data
#     img = img.astype('float32')
#     img = img - [123.68, 116.779, 103.939]
#     return img

# # load an image and predict the class
# def run_example():
#     # load the image
#     test_dir = experts_test_dir = "/Users/hn/Documents/01_research_data/NASA/ML_data/" + \
#                                   "regular_train_images_" + VI_idx + "/test20/"
    
#     img = load_image(test_dir+'double_1624_WSDA_SF_2016.jpg')
#     # load model
#     model_dir = "/Users/hn/Documents/01_research_data/NASA/ML_Models/"
#     model = load_model(model_dir + '01_TL_SingleDoubleEVI_regular_train80.h5')
#     result = model.predict(img)
#     print("result[0]: ", result[0])

# # entry point, run the example
# run_example()

# KNN

In [57]:
def DTW_prune(ts1, ts2):
    d,_ = dtw.warping_paths(ts1, ts2, window=10, use_pruning=True);
    return d

In [58]:
filename = model_dir + "00_KNN_regular_EVI_DTW_prune_distanceWeight_12NNisBest.sav"
distanceWeight_KNN = pickle.load(open(filename, 'rb'))

In [59]:
filename = model_dir + "00_KNN_regular_EVI_DTW_prune_uniformWeight_11NNisBest.sav"
uniform_KNN = pickle.load(open(filename, 'rb'))

In [60]:
%%time
KNN_DTW_test_predictions_uniform = uniform_KNN.predict(x_test_df.iloc[:, 1:])
KNN_DTW_test_predictions_distanceWeight = distanceWeight_KNN.predict(x_test_df.iloc[:, 1:])

CPU times: user 11min 7s, sys: 2.5 s, total: 11min 10s
Wall time: 11min 10s


In [61]:
KNN_y_test=y_test_df.copy()
KNN_y_test["KNN_pred_uniform"] = list(KNN_DTW_test_predictions_uniform)
KNN_y_test["KNN_pred_distance"] = list(KNN_DTW_test_predictions_distanceWeight)
KNN_y_test.head(2)

Unnamed: 0,ID,Vote,KNN_pred_uniform,KNN_pred_distance
1221,7667_WSDA_SF_2016,1,1,1
1334,99748_WSDA_SF_2017,1,1,1


In [62]:
KNN_y_test = pd.merge(KNN_y_test, meta, on=['ID'], how='left')

In [63]:
KNN_y_test_dist_A1P2=KNN_y_test[KNN_y_test.Vote==1]
KNN_y_test_dist_A1P2=KNN_y_test_dist_A1P2[KNN_y_test_dist_A1P2.KNN_pred_distance==2]

KNN_y_test_dist_A2P1=KNN_y_test[KNN_y_test.Vote==2]
KNN_y_test_dist_A2P1=KNN_y_test_dist_A2P1[KNN_y_test_dist_A2P1.KNN_pred_distance==1]

print (KNN_y_test_dist_A2P1.ExctAcr.sum())
print (KNN_y_test_dist_A1P2.ExctAcr.sum())
abs(KNN_y_test_dist_A2P1.ExctAcr.sum()-KNN_y_test_dist_A1P2.ExctAcr.sum())

240.22581581805161
482.93655922516064


242.71074340710902

In [64]:
KNN_y_test_uniform_A1P2=KNN_y_test[KNN_y_test.Vote==1]
KNN_y_test_uniform_A1P2=KNN_y_test_uniform_A1P2[KNN_y_test_uniform_A1P2.KNN_pred_uniform==2]

KNN_y_test_uniform_A2P1=KNN_y_test[KNN_y_test.Vote==2]
KNN_y_test_uniform_A2P1=KNN_y_test_uniform_A2P1[KNN_y_test_uniform_A2P1.KNN_pred_uniform==1]


print (KNN_y_test_uniform_A2P1.ExctAcr.sum())
print (KNN_y_test_uniform_A1P2.ExctAcr.sum())

abs(KNN_y_test_uniform_A2P1.ExctAcr.sum()-KNN_y_test_uniform_A1P2.ExctAcr.sum())

391.6405721820946
469.92146576602454


78.28089358392992

# Confusion Tables

In [65]:
KNN_y_test.head(2)

Unnamed: 0,ID,Vote,KNN_pred_uniform,KNN_pred_distance,CropTyp,Irrigtn,DataSrc,Acres,ExctAcr,LstSrvD,county
0,7667_WSDA_SF_2016,1,1,1,wheat,center pivot,producer,35,35.232575,2016/09/21 00:00:00,Benton
1,99748_WSDA_SF_2017,1,1,1,"corn, field",center pivot,wsda,50,50.269316,2017/08/09,Grant


In [66]:
# y_test_df_copy=y_test_df.copy()
# y_test_df_copy["weightDist_predictions"] = list(KNN_DTW_prune_weightsDistance_predictions)
# y_test_df_copy.head(2)

####
####   Uniform weights
####

true_single_predicted_single=0
true_single_predicted_double=0

true_double_predicted_single=0
true_double_predicted_double=0

actual_double_predicted_single_IDs=[]
actual_single_predicted_double_IDs=[]

for index in KNN_y_test.index:
    curr_vote=list(KNN_y_test[KNN_y_test.index==index].Vote)[0]
    curr_predict=list(KNN_y_test[KNN_y_test.index==index].KNN_pred_uniform)[0]
    if curr_vote==curr_predict:
        if curr_vote==1:
            true_single_predicted_single+=1
        else:
            true_double_predicted_double+=1
    else:
        if curr_vote==1:
            true_single_predicted_double+=1
            actual_single_predicted_double_IDs+=list(KNN_y_test[KNN_y_test.index==index].ID)
        else:
            true_double_predicted_single+=1
            actual_double_predicted_single_IDs += list(KNN_y_test[KNN_y_test.index==index].ID)

confus_tbl_test = pd.DataFrame(columns=['None', 'Predict_Single', 'Predict_Double'], 
                               index=range(2))
confus_tbl_test.loc[0, 'None'] = 'Actual_Single'
confus_tbl_test.loc[1, 'None'] = 'Actual_Double'
confus_tbl_test['Predict_Single']=0
confus_tbl_test['Predict_Double']=0

confus_tbl_test.loc[0, "Predict_Single"]=true_single_predicted_single
confus_tbl_test.loc[0, "Predict_Double"]=true_single_predicted_double
confus_tbl_test.loc[1, "Predict_Single"]=true_double_predicted_single
confus_tbl_test.loc[1, "Predict_Double"]=true_double_predicted_double
confus_tbl_test

Unnamed: 0,None,Predict_Single,Predict_Double
0,Actual_Single,213,6
1,Actual_Double,4,46


In [67]:
# y_test_df_copy=y_test_df.copy()
# y_test_df_copy["weightDist_predictions"] = list(KNN_DTW_prune_weightsDistance_predictions)
# y_test_df_copy.head(2)

####
####   Uniform weights
####

true_single_predicted_single=0
true_single_predicted_double=0

true_double_predicted_single=0
true_double_predicted_double=0

actual_double_predicted_single_IDs=[]
actual_single_predicted_double_IDs=[]

for index in KNN_y_test.index:
    curr_vote=list(KNN_y_test[KNN_y_test.index==index].Vote)[0]
    curr_predict=list(KNN_y_test[KNN_y_test.index==index].KNN_pred_distance)[0]
    if curr_vote==curr_predict:
        if curr_vote==1:
            true_single_predicted_single+=1
        else:
            true_double_predicted_double+=1
    else:
        if curr_vote==1:
            true_single_predicted_double+=1
            actual_single_predicted_double_IDs+=list(KNN_y_test[KNN_y_test.index==index].ID)
        else:
            true_double_predicted_single+=1
            actual_double_predicted_single_IDs += list(KNN_y_test[KNN_y_test.index==index].ID)

confus_tbl_test = pd.DataFrame(columns=['None', 'Predict_Single', 'Predict_Double'], 
                               index=range(2))
confus_tbl_test.loc[0, 'None'] = 'Actual_Single'
confus_tbl_test.loc[1, 'None'] = 'Actual_Double'
confus_tbl_test['Predict_Single']=0
confus_tbl_test['Predict_Double']=0

confus_tbl_test.loc[0, "Predict_Single"]=true_single_predicted_single
confus_tbl_test.loc[0, "Predict_Double"]=true_single_predicted_double
confus_tbl_test.loc[1, "Predict_Single"]=true_double_predicted_single
confus_tbl_test.loc[1, "Predict_Double"]=true_double_predicted_double
confus_tbl_test

Unnamed: 0,None,Predict_Single,Predict_Double
0,Actual_Single,212,7
1,Actual_Double,3,47


In [68]:
out_name=test_result_dir+ "regular_KNN_y_test.csv"
KNN_y_test.to_csv(out_name, index = False)

In [72]:
for anID in list(KNN_y_test_uniform_A1P2.ID):
    curr_dt = SG_data_4_plot[SG_data_4_plot.ID==anID].copy()
    curr_meta = meta[meta.ID==anID].copy()
    
    curr_year=curr_dt.human_system_start_time.dt.year.unique()[0]
    curr_raw = landsat_DF[landsat_DF.ID==anID].copy()
    curr_raw=curr_raw[curr_raw.human_system_start_time.dt.year==curr_year]
    
    curr_vote = KNN_y_test_uniform_A1P2[KNN_y_test_uniform_A1P2.ID==anID].Vote.values[0]
    curr_pred = KNN_y_test_uniform_A1P2[KNN_y_test_uniform_A1P2.ID==anID].KNN_pred_uniform.values[0]    
    title_ = list(curr_meta.CropTyp)[0] + " (" + str(list(curr_meta.Acres)[0]) + " acre), "+\
             "Experts' vote: " + str(curr_vote) + ", prediction: " + str(curr_pred)
    
    curr_plt = plot_oneColumn_CropTitle(dt=curr_dt, raw_dt=curr_raw, titlee=title_, 
                                        _label = "EVI (5-step smoothed)")

    plot_path = test_result_dir + "regular_KNN_plots_A1_P2/"
    os.makedirs(plot_path, exist_ok=True)
    fig_name = plot_path + anID + '.pdf'
    plt.savefig(fname = fig_name, dpi=400, bbox_inches='tight')
    plt.close('all')
    
for anID in list(KNN_y_test_uniform_A2P1.ID):
    curr_dt = SG_data_4_plot[SG_data_4_plot.ID==anID].copy()
    curr_meta = meta[meta.ID==anID].copy()
    
    curr_year=curr_dt.human_system_start_time.dt.year.unique()[0]
    curr_raw = landsat_DF[landsat_DF.ID==anID].copy()
    curr_raw=curr_raw[curr_raw.human_system_start_time.dt.year==curr_year]
    
    curr_vote = KNN_y_test_uniform_A2P1[KNN_y_test_uniform_A2P1.ID==anID].Vote.values[0]
    curr_pred = KNN_y_test_uniform_A2P1[KNN_y_test_uniform_A2P1.ID==anID].KNN_pred_uniform.values[0]    
    title_ = list(curr_meta.CropTyp)[0] + " (" + str(list(curr_meta.Acres)[0]) + " acre), "+\
             "Experts' vote: " + str(curr_vote) + ", prediction: " + str(curr_pred)
    
    curr_plt = plot_oneColumn_CropTitle(dt=curr_dt, raw_dt=curr_raw, titlee=title_, 
                                        _label = "EVI (5-step smoothed)")
    
    plot_path = test_result_dir + "regular_KNN_plots_A2_P1/"
    os.makedirs(plot_path, exist_ok=True)
    fig_name = plot_path + anID + '.pdf'
    plt.savefig(fname = fig_name, dpi=400, bbox_inches='tight')
    plt.close('all')


In [73]:
KNN_y_test_uniform_A2P1.groupby(['CropTyp'])['CropTyp'].count()

CropTyp
barley            1
bean, dry         1
buckwheat         1
yellow mustard    1
Name: CropTyp, dtype: int64

In [74]:
KNN_y_test_uniform_A1P2.groupby(['CropTyp'])['CropTyp'].count()

CropTyp
bluegrass seed    1
canola            1
corn, field       1
corn, sweet       1
grass hay         1
wheat             1
Name: CropTyp, dtype: int64

# Are mistakes in Common?

In [75]:
common_mistakes = y_test_df.copy()

In [76]:
SVM_None_y_test_df_A2_P1.rename(columns={"prediction": "SVM_None_pred_A2P1"}, inplace=True)
SVM_None_y_test_df_A1_P2.rename(columns={"prediction": "SVM_None_pred_A1P2"}, inplace=True)

In [77]:
common_mistakes=pd.merge(common_mistakes, SVM_None_y_test_df_A2_P1[["ID","SVM_None_pred_A2P1"]],on=['ID'],how='left')
common_mistakes=pd.merge(common_mistakes, SVM_None_y_test_df_A1_P2[["ID","SVM_None_pred_A1P2"]],on=['ID'],how='left')

In [78]:
forest_default_yTest_A2P1.rename(columns={"prediction": "RF_default_pred_A2P1"}, inplace=True)
forest_default_yTest_A1P2.rename(columns={"prediction": "RF_default_pred_A1P2"}, inplace=True)

In [79]:
common_mistakes=pd.merge(common_mistakes,\
                        forest_default_yTest_A2P1[["ID","RF_default_pred_A2P1"]],on=['ID'],how='left')
common_mistakes=pd.merge(common_mistakes,\
                         forest_default_yTest_A1P2[["ID","RF_default_pred_A1P2"]],on=['ID'],how='left')

In [80]:
KNN_y_test_uniform_A2P1.rename(columns={"KNN_pred_uniform": "KNN_uniform_pred_A2P1"}, inplace=True)
KNN_y_test_uniform_A1P2.rename(columns={"KNN_pred_uniform": "KNN_uniform_pred_A1P2"}, inplace=True)

In [81]:
common_mistakes=pd.merge(common_mistakes,\
                         KNN_y_test_uniform_A2P1[["ID","KNN_uniform_pred_A2P1"]],on=['ID'],how='left')

common_mistakes=pd.merge(common_mistakes,\
                         KNN_y_test_uniform_A1P2[["ID","KNN_uniform_pred_A1P2"]], on=['ID'],how='left')

In [83]:
common_mistakes_clean=pd.DataFrame() # = common_mistakes.copy() # common_mistakes.dropna(thresh=4)

# vote_columns =['SVM_None_pred_A2P1', 'SVM_None_pred_A1P2',
#                'RF_default_pred_A2P1', 'RF_default_pred_A1P2', 
#                'KNN_uniform_pred_A2P1', 'KNN_uniform_pred_A1P2']
# for indeks_ in common_mistakes.index:
#     curr_row = common_mistakes[common_mistakes.index==indeks_].copy()
#     if list(curr_row.isnull().sum(axis=1))[0]<5:
#         common_mistakes_clean = pd.concat([common_mistakes_clean, curr_row])

In [91]:
common_mistakes_clean=common_mistakes.dropna(thresh=4)
common_mistakes_clean

Unnamed: 0,ID,Vote,SVM_None_pred_A2P1,SVM_None_pred_A1P2,RF_default_pred_A2P1,RF_default_pred_A1P2,KNN_uniform_pred_A2P1,KNN_uniform_pred_A1P2
20,60678_WSDA_SF_2016,2,1.0,,1.0,,1.0,
74,2660_WSDA_SF_2016,2,1.0,,1.0,,,
183,101372_WSDA_SF_2017,1,,2.0,,,,2.0
188,99837_WSDA_SF_2017,2,,,1.0,,1.0,
199,53718_WSDA_SF_2016,2,1.0,,1.0,,,
206,106509_WSDA_SF_2017,2,1.0,,1.0,,1.0,
252,99643_WSDA_SF_2018,1,,2.0,,,,2.0


In [92]:
common_mistakes_clean=pd.merge(common_mistakes_clean, meta, on=['ID'], how='left')
common_mistakes_clean

Unnamed: 0,ID,Vote,SVM_None_pred_A2P1,SVM_None_pred_A1P2,RF_default_pred_A2P1,RF_default_pred_A1P2,KNN_uniform_pred_A2P1,KNN_uniform_pred_A1P2,CropTyp,Irrigtn,DataSrc,Acres,ExctAcr,LstSrvD,county
0,60678_WSDA_SF_2016,2,1.0,,1.0,,1.0,,"bean, dry",center pivot,wsda,32,32.322881,2016/08/17 00:00:00,Adams
1,2660_WSDA_SF_2016,2,1.0,,1.0,,,,"corn, field",center pivot,wsda,127,127.042075,2016/08/31 00:00:00,Benton
2,101372_WSDA_SF_2017,1,,2.0,,,,2.0,canola,center pivot,wsda,80,80.255504,2017/10/16,Grant
3,99837_WSDA_SF_2017,2,,,1.0,,1.0,,barley,center pivot,wsda,73,73.250981,2017/10/10,Grant
4,53718_WSDA_SF_2016,2,1.0,,1.0,,,,triticale,center pivot,wsda,124,124.224249,2016/08/17 00:00:00,Adams
5,106509_WSDA_SF_2017,2,1.0,,1.0,,1.0,,yellow mustard,center pivot,wsda,135,134.651954,2017/10/17,Grant
6,99643_WSDA_SF_2018,1,,2.0,,,,2.0,"corn, field",center pivot,wsda,37,36.938837,2018/06/06 00:00:00,Franklin


In [87]:
common_mistakes_clean.iloc[5]

ID                       106509_WSDA_SF_2017
Vote                                       2
SVM_None_pred_A2P1                       1.0
SVM_None_pred_A1P2                       NaN
RF_default_pred_A2P1                     1.0
RF_default_pred_A1P2                     NaN
KNN_uniform_pred_A2P1                    1.0
KNN_uniform_pred_A1P2                    NaN
CropTyp                       yellow mustard
Irrigtn                         center pivot
DataSrc                                 wsda
Acres                                    135
ExctAcr                           134.651954
LstSrvD                           2017/10/17
county                                 Grant
Name: 5, dtype: object

In [88]:
vote_columns =['SVM_None_pred_A2P1', 'SVM_None_pred_A1P2',
               'RF_default_pred_A2P1', 'RF_default_pred_A1P2', 
               'KNN_uniform_pred_A2P1', 'KNN_uniform_pred_A1P2']

vote_colMethod =['SVM', 'SVM', 'RF', 'RF', 'kNN', 'kNN']
               
for anID in list(common_mistakes_clean.ID):
    curr_dt = SG_data_4_plot[SG_data_4_plot.ID==anID].copy()
    curr_meta = meta[meta.ID==anID].copy()
    
    curr_year=curr_dt.human_system_start_time.dt.year.unique()[0]
    curr_raw = landsat_DF[landsat_DF.ID==anID].copy()
    curr_raw=curr_raw[curr_raw.human_system_start_time.dt.year==curr_year]    
    
    curr_mistake = common_mistakes_clean[common_mistakes_clean.ID==anID]
    curr_vote=list(curr_mistake.Vote)[0]
    v = (curr_mistake[vote_columns].notna().iloc[0])*1
    mistakeMethods=[vote_colMethod[i] for i in np.where(v.values == 1)[0]]

    title_ = list(curr_meta.CropTyp)[0] + " (" + str(list(curr_meta.Acres)[0]) + \
             " acre), Expert Vote: " + str(curr_vote) + ", mistakes: " + str(mistakeMethods)
    
    curr_plt = plot_oneColumn_CropTitle(dt=curr_dt, raw_dt=curr_raw, titlee=title_, 
                                        _label = "EVI (5-step smoothed)")
    
    if curr_vote==2:
        plot_path = test_result_dir + "commonMistakes/regular/A2P1/"
    else:
        plot_path = test_result_dir + "commonMistakes/regular/A1P2/"
        
    os.makedirs(plot_path, exist_ok=True)
    fig_name = plot_path + anID + '.pdf'
    plt.savefig(fname = fig_name, dpi=400, bbox_inches='tight')
    plt.close('all')