## Appendix: Helper Functions

### Download NAIP Images

In [None]:
import boto3
import os

def download_naip_image(state, year, lat, lon, filename, save_directory):
    '''
    Given a year indicator, latitude, longitude, and NAIP filename, this function makes a call to the AWS S3 public
    bucket that hosts the NAIP imagery and download the selected image to a specified save directory on the
    user's computer.
    '''
    # AWS doesn't include the version date in the filename, so splice that off before appending to path_to_download
    filename_shortened = filename[0:26] + ".tif"

    # Define the folder to search on AWS
    path_to_download = state + '/' + str(year) + '/100cm/rgbir/' + str(lat) + '0' + str(lon) + '/' + filename_shortened
    
    # Initialize boto3 S3 client
    #s3_client = boto3.client('s3')
    s3_client = boto3.client(
        's3',
        # Hard coded strings as credentials, not recommended.
        aws_access_key_id='<fill in here>',
        aws_secret_access_key='<fill in here>'
    )

    # Define the path where we want to save the file
    # Note: We'll make sure images are stored in separate directories by year
    save_path = save_directory + '/' + str(year) + '/' + filename

    # Check if this file already exists at the save path; if not, download it
    if not os.path.exists(save_path):
        save_path = save_directory + '/' + str(year) + '/' + filename
        print("Downloading image: " + path_to_download)
        s3_client.download_file('naip-source', path_to_download, save_path, {'RequestPayer':'requester'})
    else:
        print("Looks like you've already downloaded this file.")

    print("Finished downloading image: " + filename_shortened)

In [None]:
# Data for Duluth, MN
mn_files_to_download = [
    ('mn', 2013, 46, 92, 'm_4609215_sw_15_1_20130618_20130930.tif'),
    ('mn', 2013, 46, 92, 'm_4609215_se_15_1_20130618_20130930.tif'),
    ('mn', 2013, 46, 92, 'm_4609216_sw_15_1_20130618_20130930.tif'),
    ('mn', 2013, 46, 92, 'm_4609223_nw_15_1_20130618_20130930.tif'),
    ('mn', 2013, 46, 92, 'm_4609223_ne_15_1_20130618_20130930.tif'),
    ('mn', 2013, 46, 92, 'm_4609224_nw_15_1_20130618_20130930.tif'),
    ('mn', 2015, 46, 92, 'm_4609215_sw_15_1_20150922_20151221.tif'),
    ('mn', 2015, 46, 92, 'm_4609215_se_15_1_20150922_20151221.tif'),
    ('mn', 2015, 46, 92, 'm_4609216_sw_15_1_20150922_20151221.tif'),
    ('mn', 2015, 46, 92, 'm_4609223_nw_15_1_20150922_20151221.tif'),
    ('mn', 2015, 46, 92, 'm_4609223_ne_15_1_20150922_20151221.tif'),
    ('mn', 2015, 46, 92, 'm_4609224_nw_15_1_20150922_20151221.tif')
]

# Data for Flint, Michigan
mi_to_download = [
    ('mi', 2014, 43, 83, 'm_4308358_ne_17_1_20140803_20141021.tif'), 
    ('mi', 2014, 43, 83, 'm_4308359_nw_17_1_20140722_20141021.tif'),
    ('mi', 2014, 43, 83, 'm_4308359_ne_17_1_20140722_20141021.tif'),
    ('mi', 2014, 43, 83, 'm_4308358_se_17_1_20140803_20141021.tif'),
    ('mi', 2014, 43, 83, 'm_4308359_sw_17_1_20140722_20141021.tif'),
    ('mi', 2014, 43, 83, 'm_4308359_se_17_1_20140722_20141021.tif'),
    ('mi', 2014, 42, 83, 'm_4208302_ne_17_1_20140803_20141021.tif'),
    ('mi', 2014, 42, 83, 'm_4208303_nw_17_1_20140722_20141021.tif'),
    ('mi', 2014, 42, 83, 'm_4208303_ne_17_1_20140722_20141021.tif'),
    ('mi', 2012, 43, 83, 'm_4308358_ne_17_1_20120627_20120911.tif'),
    ('mi', 2012, 43, 83, 'm_4308359_nw_17_1_20120629_20120911.tif'),
    ('mi', 2012, 43, 83, 'm_4308359_ne_17_1_20120629_20120911.tif'),
    ('mi', 2012, 43, 83, 'm_4308358_se_17_1_20120627_20120911.tif'),
    ('mi', 2012, 43, 83, 'm_4308359_sw_17_1_20120629_20120911.tif'),
    ('mi', 2012, 43, 83, 'm_4308359_se_17_1_20120629_20120911.tif'),
    ('mi', 2012, 42, 83, 'm_4208302_ne_17_1_20120627_20120911.tif'),
    ('mi', 2012, 42, 83, 'm_4208303_nw_17_1_20120629_20120911.tif'),
    ('mi', 2012, 42, 83, 'm_4208303_ne_17_1_20120629_20120911.tif')
]

for file in mn_files_to_download:
    download_naip_image(file[0], file[1], file[2], file[3], file[4], "./images/naip")

### Misclassification Table

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix

test_train_dict = {
    0: 'water',
    1: 'grass/agriculture',
    2: 'turf/fields',
    3: 'trees',
    4: 'dirt/soil/sand',
    5: 'asphalt/buildings',
    6: 'dirt/soil/sand'
}

gt_dict = {
    1: 'grass/agriculture',
    2: 'dirt/soil/sand',
    3: 'asphalt/buildings',
    4: 'asphalt/buildings',
    5: 'water',
    6: 'trees',
    7: 'trees',
    8: 'grass/agriculture',
    9: 'grass/agriculture',
    10: 'water',
    11: 'water',
    12: 'dirt/soil/sand'
}

def misclassification_table(img_sets, test_train_dict, gt_dict):
    for i in range(len(img_sets)):
        classified_img = rasterio.open(img_sets[i][1])
        gt_img = rasterio.open(img_sets[i][2])

        classification = classified_img.read(1).astype('int')
        gt_class = gt_img.read(1).astype('int')
        
        # https://stackoverflow.com/questions/16992713/translate-every-element-in-numpy-array-according-to-key
        def vec_translate(a, my_dict):    
            return np.vectorize(my_dict.__getitem__)(a)
        
        classification_text = vec_translate(classification, test_train_dict)
        
        gt_class_text = vec_translate(gt_class, gt_dict)
    
        #df_confusion = pd.crosstab(list(gt_class_text.reshape(-1, 1)), list(classification_text.reshape(-1, 1)), rownames=['Ground_Truth_Classes'], colnames=['Predicted_Classes'], margins=True)
        
        pred_accuracy = confusion_matrix(gt_class_text.reshape(-1, 1), classification_text.reshape(-1, 1),
                                        labels=['water',
                                                'grass/agriculture',
                                                'turf/fields',
                                                'trees',
                                                'dirt/soil/sand', 
                                                'asphalt/buildings'])
    
        return(pred_accuracy)