## Code to calculate the centroids of the (trimmed) terminus picks

#### Jukes Liu

In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.pylab as pl
import matplotlib.image as mpimg
import datetime
import types
import os
import cv2
import scipy.stats

If output images have not yet been converted to png format from pgm, do it using cell magic:

In [30]:
%%bash

cd /media/jukes/jukes1/LS8aws/Box033/rotated/resized/
mogrify -format png *.pgm

### 1) Set up

- set BoxIDs to calculate centroids for
- set mass or size
- define the centroid function
- read in dates from datetags.csv

In [36]:
BoxIDs = ['001', '002', '004', '033', '120', '174', '235', '259', '277', '531']
massorsize = "mass"

#Define the centroid function
def centroid(x, y):
    length = len(x)
    return sum(x) / length, sum(y) / length

In [37]:
#Read in datetags csv as datetime_df
datetime_df = pd.read_csv('/home/jukes/Documents/Sample_glaciers/datetags.csv', sep=',', dtype=str, header=0, names=['Img_Date', 'datetimes'])
print(datetime_df.shape)
datetime_df.head()

(1687, 2)


Unnamed: 0,Img_Date,datetimes
0,LC80360042017077LGN00,2017-03-18
1,LC80360042015248LGN00,2015-09-05
2,LC80360042015184LGN00,2015-07-03
3,LC80360042016107LGN00,2016-04-16
4,LC80360042015232LGN00,2015-08-20


### 2) OPTION A: Calculate centroids for terminus picked using a metric (mass or size)

Grabs the terminus pick line coordinates from the .dat files generated from the 2D WTMM in Xsmurf and calculates their centroid using the centroid function. This calculates the centroids for the original dat file (term_dat) AND the trimmed terminus pick (term_trim_dat). Currently only outputs the trimmed termini centroids.

In [24]:
centroid_xs = []
centroid_ys = []
BOIs_final = []
scenenames = []
basepath = '/media/jukes/jukes1/LS8aws/'
metric = "terminus_highest"+massorsize+"/" 

for BOI in BoxIDs:
    print("Box"+BOI)
    imagepath = basepath+"Box"+BOI+"/rotated/resized/"

#     #make results directory in BoxID folder if it doesn't already exist
#     if os.path.exists(basepath+"Box"+BOI+"/Results/"):
#         print("RESULTS FOLDER EXISTS ALREADY. SKIP.")
#     #OTHERWISE, create the folder and download into it
#     else:
#         os.mkdir(basepath+"Box"+BOI+"/Results/")
#         print("Results  folder made")

    #make lists to store image data and grab image files
    imgfiles = os.listdir(imagepath)
    image_arrays = []
    dats = []
    trimdats = []
    imgnames = []
    avgpix_values = []
    skews = []
    BOIs =[]

    for imgfile in imgfiles:
        #grab image files and append to images list
        if imgfile.endswith(BOI+".png"):
    #         print(imgfile)
            image = mpimg.imread(imagepath+imgfile)
            imgname = imgfile[0:-4]
            scenename = imgname[7:-16]
            pathtodat = imagepath+imgname+".pgm_max_gaussian/"+metric
            datfiles = os.listdir(pathtodat)
            
        
            #NOT FILTERING FOR CLOUDS AGAIN CURRENTLY:
            #If pixel values are skewed toward 1, it's prob cloudy
            pixelvals = image.reshape(image.shape[0]*image.shape[1])
    #         print(pixelvals.shape)
            skew = scipy.stats.skew(pixelvals, bias=False)

            avgpix_val = np.average(pixelvals)
            avgpix_thresh = 0.50  

            #if there are 2 datfiles and not cloudy, grab the trimmed and non-trimmed files
            if len(datfiles) == 2: #and avgpix_val < avgpix_thresh and skew > -0.80:
                #append the image array and the image name to the list
                image_arrays.append(image)
                imgnames.append(scenename)
                skews.append(skew)
                avgpix_values.append(avgpix_val)
                BOIs.append(BOI)

                #find the trimmed dat file and the original
                for dat in datfiles:
                    if "trim" in dat:
                        datfile_trim = dat
                        trimdats.append(datfile_trim)
                    else:
                        datfile = dat
                        dats.append(datfile)

    #         print(image, datfile_trim, datfile)
#             else:
#                 print("NO DAT FILES CREATED FOR TERMINUS PICK")

    print(len(image_arrays), len(dats), len(trimdats), len(imgnames), len(avgpix_values))
    images_df = pd.DataFrame(list(zip(imgnames, BOIs, image_arrays, dats, trimdats, avgpix_values, skews)),
                  columns=['Scene', 'BoxID','Image array', 'Dat file name', "Trimmed dat file name", 'Avg pix val', 'Skew'])
    
    #JOIN DATAFRAMES
    images_df.sort_values(by='Scene')
    # images_df
    datetime_df = datetime_df.sort_values(by='Img_Date')
#     print(datetime_df.head())
    
    new_df = images_df.set_index('Scene').join(datetime_df.set_index('Img_Date'))
    dated_images_df = new_df.sort_values(by='datetimes')
#     print(dated_images_df.head())

    #CALCULATE ALL CENTROIDS
    for index, row in dated_images_df.iterrows():
        imagename = index
        trimdat = row['Trimmed dat file name']
        dat = row['Dat file name']
        BoxID = row['BoxID']
        
        datpath = basepath+"Box"+BoxID+"/rotated/resized/crop_R_"+imagename+"_B8_PS_Buffer"+BoxID+".pgm_max_gaussian/"+metric

        #Read in dat file as np array and grab x and y values
        #TRIMMED:
        term_trim_dat = np.loadtxt(datpath+trimdat)

        #ORIGINAL:
        term_dat = np.loadtxt(datpath+dat)

        #ORIGINAL
        term_xs = []
        term_ys = []

        #grab x and y values for the terminus line
        for j in term_dat:
            x, y = (j[0], j[1])
            term_xs.append(x)
            term_ys.append(y)

        #TRIMMED
        term_trim_xs = []
        term_trim_ys = []

        #grab x and y values for the terminus line
        for j in term_trim_dat:
    #         print(j)
            x, y = (j[0], j[1])
            term_trim_xs.append(x)
            term_trim_ys.append(y)

        #CALCULATE CENTROIDS AND APPEND TO LISTS
        center_x, center_y = centroid(term_xs, term_ys)
        trim_center_x, trim_center_y = centroid(term_trim_xs, term_trim_ys)
        centroid_xs.append(trim_center_x)
        centroid_ys.append(trim_center_y)
        BOIs_final.append(BoxID)
        scenenames.append(imagename)

Box001
137 137 137 137 137
Box002
29 29 29 29 29
Box004
20 20 20 20 20
Box033
94 94 94 94 94
Box120
106 106 106 106 106
Box174
47 47 47 47 47
Box235
93 93 93 93 93
Box259
76 76 76 76 76
Box277
62 62 62 62 62
Box531
208 208 208 208 208


### 2) OPTION B: Calculate midpoints for terminus picked using a metric (mass or size)

Grabs the terminus pick line coordinates from the .dat files generated from the 2D WTMM in Xsmurf and identifies the middle one/ This calculates the centroids for the trimmed terminus pick (term_trim_dat). 

In [38]:
midpoints_xs = []
midpoints_ys = []
BOIs_final = []
scenenames = []
basepath = '/media/jukes/jukes1/LS8aws/'
metric = "terminus_highest"+massorsize+"/" 

for BOI in BoxIDs:
    print("Box"+BOI)
    imagepath = basepath+"Box"+BOI+"/rotated/resized/"

#     #make results directory in BoxID folder if it doesn't already exist
#     if os.path.exists(basepath+"Box"+BOI+"/Results/"):
#         print("RESULTS FOLDER EXISTS ALREADY. SKIP.")
#     #OTHERWISE, create the folder and download into it
#     else:
#         os.mkdir(basepath+"Box"+BOI+"/Results/")
#         print("Results  folder made")

    #make lists to store image data and grab image files
    imgfiles = os.listdir(imagepath)
    image_arrays = []
    dats = []
    trimdats = []
    imgnames = []
    avgpix_values = []
    skews = []
    BOIs =[]

    for imgfile in imgfiles:
        #grab image files and append to images list
        if imgfile.endswith(BOI+".png"):
    #         print(imgfile)
            image = mpimg.imread(imagepath+imgfile)
            imgname = imgfile[0:-4]
            scenename = imgname[7:-16]
            pathtodat = imagepath+imgname+".pgm_max_gaussian/"+metric
            datfiles = os.listdir(pathtodat)
            
        
            #NOT FILTERING FOR CLOUDS AGAIN CURRENTLY:
            #If pixel values are skewed toward 1, it's prob cloudy
            pixelvals = image.reshape(image.shape[0]*image.shape[1])
    #         print(pixelvals.shape)
            skew = scipy.stats.skew(pixelvals, bias=False)

            avgpix_val = np.average(pixelvals)
            avgpix_thresh = 0.50  

            #if there are 2 datfiles and not cloudy, grab the trimmed and non-trimmed files
            if len(datfiles) == 2: #and avgpix_val < avgpix_thresh and skew > -0.80:
                #append the image array and the image name to the list
                image_arrays.append(image)
                imgnames.append(scenename)
                skews.append(skew)
                avgpix_values.append(avgpix_val)
                BOIs.append(BOI)

                #find the trimmed dat file and the original
                for dat in datfiles:
                    if "trim" in dat:
                        datfile_trim = dat
                        trimdats.append(datfile_trim)
                    else:
                        datfile = dat
                        dats.append(datfile)

    #         print(image, datfile_trim, datfile)
#             else:
#                 print("NO DAT FILES CREATED FOR TERMINUS PICK")

    print(len(image_arrays), len(dats), len(trimdats), len(imgnames), len(avgpix_values))
    images_df = pd.DataFrame(list(zip(imgnames, BOIs, image_arrays, dats, trimdats, avgpix_values, skews)),
                  columns=['Scene', 'BoxID','Image array', 'Dat file name', "Trimmed dat file name", 'Avg pix val', 'Skew'])
    
    #JOIN DATAFRAMES
    images_df.sort_values(by='Scene')
    # images_df
    datetime_df = datetime_df.sort_values(by='Img_Date')
#     print(datetime_df.head())
    
    new_df = images_df.set_index('Scene').join(datetime_df.set_index('Img_Date'))
    dated_images_df = new_df.sort_values(by='datetimes')
#     print(dated_images_df.head())

    #CALCULATE ALL MIDPOINTS
    for index, row in dated_images_df.iterrows():
        imagename = index
        trimdat = row['Trimmed dat file name']
        dat = row['Dat file name']
        BoxID = row['BoxID']
        
        datpath = basepath+"Box"+BoxID+"/rotated/resized/crop_R_"+imagename+"_B8_PS_Buffer"+BoxID+".pgm_max_gaussian/"+metric

        #Read in dat file as np array and grab x and y values
        #TRIMMED:
        term_trim_dat = np.loadtxt(datpath+trimdat)

        #TRIMMED
        term_trim_xs = []
        term_trim_ys = []

        #grab x and y values for the terminus line
        for j in term_trim_dat:
    #         print(j)
            x, y = (j[0], j[1])
            term_trim_xs.append(x)
            term_trim_ys.append(y)
        
        #determine the index of the "midpoint" of the terminus line to find the x and y coordinate of it
        mid_index = int(np.round_(len(term_trim_xs)/2))
        trim_mid_x = term_trim_xs[mid_index]
        trim_mid_y = term_trim_ys[mid_index]

        #APPEND TO LISTS       
        midpoints_xs.append(trim_mid_x)
        midpoints_ys.append(trim_mid_y)
        BOIs_final.append(BoxID)
        scenenames.append(imagename)

Box001
164 164 164 164 164
Box002
29 29 29 29 29
Box004
20 20 20 20 20
Box033
112 112 112 112 112
Box120
103 103 103 103 103
Box174
60 60 60 60 60
Box235
124 124 124 124 124
Box259
115 115 115 115 115
Box277
62 62 62 62 62
Box531
237 237 237 237 237


### 3) Store the centroids in a DataFrame and export to a csv file


Exports the trimmed terminus midpoints to a csv file called __trim_term_midpoints.csv__.

OR 

Exports the trimmed terminus centroids to a csv file called __trim_centroids.csv__.

In [39]:
midpoints_df = pd.DataFrame(list(zip(scenenames, BOIs_final, midpoints_xs, midpoints_ys)),
              columns=['Scene','BoxID', 'Mid_X','Mid_Y'])

#save as
midpoints_df.to_csv(path_or_buf = '/home/jukes/Documents/Sample_glaciers/trim_term_midpoints_'+massorsize+'.csv', sep=',')
midpoints_df

Unnamed: 0,Scene,BoxID,Mid_X,Mid_Y
0,LC80330052013125LGN01,001,146.0,160.0
1,LC80320052013134LGN03,001,169.0,149.0
2,LC80330052013141LGN01,001,184.0,153.0
3,LC80360042013146LGN00,001,145.0,160.0
4,LC80340052013148LGN00,001,145.0,160.0
5,LC80310052013239LGN00,001,140.0,158.0
6,LC80340052013244LGN00,001,143.0,162.0
7,LC80350052013251LGN00,001,139.0,160.0
8,LC80360042013258LGN00,001,158.0,161.0
9,LC80340052013260LGN00,001,138.0,159.0


In [33]:
# centroids_df = pd.DataFrame(list(zip(scenenames, BOIs_final, centroid_xs, centroid_ys)),
#               columns=['Scene','BoxID', 'Centroid_X','Centroid_Y'])

# #save as
# centroids_df.to_csv(path_or_buf = '/home/jukes/Documents/Sample_glaciers/trim_centroids_'+massorsize+'.csv', sep=',')
# centroids_df