# Threshold optimization 

Jukes Liu

## Import packages and set paths

In [1]:
import pandas as pd
import numpy as np
import os
import subprocess
import matplotlib.pyplot as plt
import cv2
import matplotlib.image as mpimg
import matplotlib.pylab as pl
import numpy.ma as ma
import datetime
import math
import scipy.optimize
import random

In [2]:
basepath = '/media/jukes/jukes1/'
sg_path = '/home/jukes/Documents/Sample_glaciers/'

## Read in analysis dates for manual and automated delinations, convert to datetime objs

In [3]:
#read in manual image dates
manual_df= pd.read_csv(basepath+'Manual/manual_tpos.csv', sep=',', dtype=str, header=0)
manual_df = manual_df.dropna()
manual_df.drop_duplicates(subset=['BoxID','datetimes'])
print(manual_df.shape)
manual_df.head()

(484, 10)


Unnamed: 0.1,Unnamed: 0,BoxID,datetimes,Line_x,Line_y,intersect_x,intersect_y,tpos50,tpos25,tpos75
0,186,1,2013-05-05,"[135, 134, 135, 133, 133, 132, 134, 134, 135, ...","[141, 142, 142, 143, 144, 145, 146, 147, 147, ...",142,159,390.0,270.0,465.0
1,269,1,2013-05-14,"[131, 132, 133, 133, 134, 135, 136, 136, 137, ...","[144, 145, 146, 147, 148, 149, 149, 150, 150, ...",142,159,390.0,240.0,450.0
2,184,1,2013-05-29,"[132, 132, 132, 134, 134, 136, 136, 137, 139, ...","[143, 144, 145, 146, 147, 147, 148, 149, 150, ...",144,159,420.0,285.0,480.0
3,254,1,2013-08-23,"[133, 132, 133, 131, 132, 131, 130, 131, 131, ...","[142, 143, 143, 144, 144, 145, 146, 147, 148, ...",140,159,360.0,210.0,465.0
4,266,1,2013-08-27,"[130, 129, 130, 129, 130, 131, 132, 133, 133, ...","[144, 145, 145, 148, 149, 149, 149, 150, 151, ...",140,159,360.0,180.0,435.0


In [4]:
#Read in datetags csv as datetime_df
automated_df = pd.read_csv(sg_path+'imgdates.csv', sep=',', dtype=str, header=0, names=['Scene', 'datetimes'])
print(automated_df.shape)
automated_df.head()

(864, 2)


Unnamed: 0,Scene,datetimes
510,LC80090132013101LGN01,2013-04-11
1612,LC80090142013101LGN01,2013-04-11
577,LC82330172013102LGN01,2013-04-12
940,LC82330152013102LGN01,2013-04-12
445,LC80080142013110LGN01,2013-04-20


## Find overlaps and select 90% for training, 10% for testing

In [8]:
overlap_df = manual_df.merge(automated_df, how='inner', on=['datetimes'])
overlap_df = overlap_df.drop(['Line_x', 'Line_y'], axis=1)
overlap_df = overlap_df.drop_duplicates(['BoxID','datetimes'])
overlap_df = overlap_df.sort_values(by=['BoxID','datetimes'], ascending=True)
overlap_df.shape

(432, 9)

In [9]:
dates = []
for idx, row in overlap_df.iterrows():
    dateID = str(row['BoxID'])+','+str(row['datetimes']+','+str(row['Scene']))
    dates.append(dateID)
# print(date_IDs)

### Select 90% for training

In [10]:
N = len(dates); print(N)

#pick a random sample of dates for training
train_dates = random.sample(dates, int(N*0.9))
print(len(train_dates))
# print(train_dates)

#grab remaining for testing
test_dates = []
for date in dates:
    if date not in train_dates:
        test_dates.append(date)
print(len(test_dates))
# print(test_dates)

#Check that they don't overlap, should return empty
print(len(train_dates)+len(test_dates)); print(set(train_dates).intersection(test_dates))

432
388
44
432
set()


In [13]:
boxes = []; imgdates = []; scenes = []

for td in train_dates:
    BoxID, imgdate, scene = td.split(',')
    boxes.append(BoxID); imgdates.append(imgdate); scenes.append(scene)

train_df = pd.DataFrame(list(zip(boxes, imgdates, scenes)), columns=['BoxID', 'datetime', 'Scene'])
train_df.head()

Unnamed: 0,BoxID,datetime,Scene
0,2,2016-03-19,LC80320052016079LGN00
1,120,2016-09-11,LC82330152016255LGN00
2,2,2014-03-28,LC80340052014087LGN00
3,2,2013-08-27,LC80310052013239LGN00
4,120,2016-12-25,LC82320182016360LGN00


In [14]:
#export to csv and text
train_df.to_csv(basepath+'/Manual/train.csv', sep=',', index=False, header=False)
train_df.to_csv(basepath+'/Manual/train.txt', sep=' ', index=False, header=False)

### Grab test dates

In [15]:
boxes = []; imgdates = []; scenes = []

for td in test_dates:
    BoxID, imgdate, scene = td.split(',')
    boxes.append(BoxID); imgdates.append(imgdate); scenes.append(scene)

test_df = pd.DataFrame(list(zip(boxes, imgdates, scenes)), columns=['BoxID', 'datetime', 'Scene'])
# test_df.head()
#export to csv and text
test_df.to_csv(basepath+'/Manual/test.csv', sep=',', index=False, header=False)
test_df.to_csv(basepath+'/Manual/test.txt', sep=' ', index=False, header=False)

## Define objective function

I'm using a modified version of the L1-norm. Imported from the automated_terminus_functions.py script. The objective funciton will be 1/N * (|Xa-Xm|i) where i=3 (for each centerline 50, 25, 75) and N equals the number of delineations generated (the more the better). The goal then is to minimize the objective function.

In [5]:
# DOA = '2020_01_20'

# def calc_theta(size_thresh, mod_thresh):
#     #Calculate automated tpos
#     #run terminus_pick.tcl using each of the thresholds
#     terminus_pick = '/home/akhalil/src/xsmurf-2.7/main/xsmurf -nodisplay /home/jukes/Documents/Scripts/terminus_pick.tcl '+str(size_thresh)+' '+str(mod_thresh)
#     print(terminus_pick)
#     subprocess.call(terminus_pick, shell=True)
    
#     #pull automated terminus position from the output
#     #grab each output file
#     differences = []
    
#     for file in os.listdir(sg_path):
#         if DOA in file and file.endswith('csv'):
#             if len(file)>28:
#                 print(file)

#                 #read the output file in and calculate terminus position for each image
#                 #pull automated terminus delineations
#                 auto_tpos = 

#                 #pull in manual tpos 
#                 man_tpos = 

#                 diff = abs(auto_tpos - man_tpos)
#                 differences.append(diff)
    
# #     #return objective function = distance between the two
# #     return np.average(differences)

In [6]:
# def minimize(size_guess, mod_guess):
#     minimum = scipy.optimize.fmin(center_dist, [size_guess, mod_guess], args=(size_guess, mod_guess),full_output=True)
#     xopt = minimum[0][0]
#     funcval = minimum[1]
#     return xopt, funcval

## Run the optimization using scipy.optimize.fmin()

    scipy.optimize.fmin(func, x0, args=(), xtol=0.0001, ftol=0.0001, maxiter=None, maxfun=None, full_output=0, disp=1, retall=0, callback=None, initial_simplex=None)[source]

Minimize a function using the downhill simplex algorithm.
This algorithm only uses function values, not derivatives or second derivatives.

Parameters
   - funccallable func(x,*args)
The objective function to be minimized.

   - x0ndarray
Initial guess.

Returns
   - xoptndarray
Parameter that minimizes function.

   - foptfloat
Value of function at minimum: fopt = func(xopt).

   - iterint
Number of iterations performed.

   - funcallsint
Number of function calls made.

   - warnflagint
1 : Maximum number of function evaluations made. 2 : Maximum number of iterations reached.

    -allvecslist
Solution at each iteration.

In [5]:
os.chdir('/home/jukes/automated-glacier-terminus')
from automated_terminus_functions import objective_func, calc_theta

In [6]:
base_size_thresh = 0.8; base_mod_thresh = 0.8; thresh_range = 0.15; step=0.005; interval=1000
size_guesses = np.arange(base_size_thresh-thresh_range, base_size_thresh+thresh_range, step)
mod_guesses = np.arange(base_mod_thresh-thresh_range, base_mod_thresh+thresh_range, step)

In [7]:
print(size_guesses); print(len(size_guesses))

[0.65  0.655 0.66  0.665 0.67  0.675 0.68  0.685 0.69  0.695 0.7   0.705
 0.71  0.715 0.72  0.725 0.73  0.735 0.74  0.745 0.75  0.755 0.76  0.765
 0.77  0.775 0.78  0.785 0.79  0.795 0.8   0.805 0.81  0.815 0.82  0.825
 0.83  0.835 0.84  0.845 0.85  0.855 0.86  0.865 0.87  0.875 0.88  0.885
 0.89  0.895 0.9   0.905 0.91  0.915 0.92  0.925 0.93  0.935 0.94  0.945
 0.95 ]
61


### Hard-coded:

In [9]:
import subprocess
import time
import pandas as pd; import numpy as np

BOXIDS = ['001', '002', '120', '174', '259']; 
IDs = " ".join(BOXIDS)

start_time = time.time() #start recording time

slist = []; mlist = []; thetalist = []; runtimes = []; #store run results
for s in size_guesses:
    for m in mod_guesses:
        t0 = time.time() # start recording time for each run
        
        s = float("{0:.3f}".format(s)); m = float("{0:.3f}".format(m)); #make sure precision and format is correct 
        slist.append(s); mlist.append(m); #append the thresholds to a list
        print(s, m)
        
        #run objective function calculation
        theta = objective_func(IDs, s, m); print(theta)
        
        runtime = time.time() - t0; runtimes.append(runtime) # calculate run time and store
        print("Iteration run time: %s seconds " % runtime) #print run time for each iteration
        
print("Total time elapsed: --- %s seconds ---" % (time.time() - start_time)) #Print total time elapsed

0.65 0.65


NameError: name 'subprocess' is not defined

## Cross-validation