# Parameter optimization for size and mod thresholds

## Import packages and set paths

In [3]:
import pandas as pd
import numpy as np
import os
import subprocess
import matplotlib.pyplot as plt
import cv2
import matplotlib.image as mpimg
import matplotlib.pylab as pl
import numpy.ma as ma
import datetime
import math
import scipy.optimize
import random

In [4]:
basepath = '/media/jukes/jukes1/'
sg_path = '/home/jukes/Documents/Sample_glaciers/'

## Read in analysis dates for manual and automated delinations, convert to datetime objs

In [49]:
#read in manual image dates
manual_df= pd.read_csv(basepath+'Manual/manual_tpos.csv', sep=',', dtype=str, header=0)
manual_df = manual_df.dropna()
manual_df.drop_duplicates(subset=['BoxID','datetimes'])
print(manual_df.shape)
manual_df.head()

(511, 8)


Unnamed: 0.1,Unnamed: 0,BoxID,datetimes,Line_x,Line_y,intersect_x,intersect_y,term_position
0,186,1,2013-05-05,"[135, 134, 135, 133, 133, 132, 134, 134, 135, ...","[140, 141, 141, 142, 143, 144, 145, 146, 146, ...",143.0,160.0,393.5177886703472
1,269,1,2013-05-14,"[131, 132, 133, 133, 134, 135, 136, 136, 137, ...","[143, 144, 145, 146, 147, 148, 148, 149, 149, ...",143.0,160.0,393.5177886703472
2,184,1,2013-05-29,"[132, 132, 132, 134, 134, 136, 136, 137, 139, ...","[142, 143, 144, 145, 146, 146, 147, 148, 149, ...",144.0,161.0,406.7324058886875
3,254,1,2013-08-23,"[133, 132, 133, 131, 132, 131, 130, 131, 131, ...","[141, 142, 142, 143, 143, 144, 145, 146, 147, ...",141.0,161.0,361.9478553604096
4,266,1,2013-08-27,"[130, 129, 130, 129, 130, 131, 132, 133, 133, ...","[143, 144, 144, 147, 148, 148, 148, 149, 150, ...",140.0,160.0,348.9717037239553


In [20]:
#Read in datetags csv as datetime_df
automated_df = pd.read_csv(sg_path+'imgdates.csv', sep=',', dtype=str, header=0, names=['Scene', 'datetimes'])
print(automated_df.shape)
automated_df.head()

(864, 2)


Unnamed: 0,Scene,datetimes
510,LC80090132013101LGN01,2013-04-11
1612,LC80090142013101LGN01,2013-04-11
577,LC82330172013102LGN01,2013-04-12
940,LC82330152013102LGN01,2013-04-12
445,LC80080142013110LGN01,2013-04-20


## Find overlaps and select 90% for training, 10% for testing

In [66]:
overlap_df = manual_df.merge(automated_df, how='inner', on=['datetimes'])
overlap_df = overlap_df.drop(['Line_x', 'Line_y'], axis=1)
overlap_df = overlap_df.drop_duplicates(['BoxID','datetimes'])
overlap_df = overlap_df.sort_values(by=['BoxID','datetimes'], ascending=True)
overlap_df.shape

(456, 7)

In [69]:
dates = []
for idx, row in overlap_df.iterrows():
    dateID = str(row['BoxID'])+','+str(row['datetimes']+','+str(row['Scene']))
    dates.append(dateID)
# print(date_IDs)

### Select 90% for training

In [70]:
N = len(dates)
print(N)

#pick a random sample of dates for training
train_dates = random.sample(dates, int(N*0.9))
print(len(train_dates))
# print(train_dates)

#grab remaining for testing
test_dates = []
for date in dates:
    if date not in train_dates:
        test_dates.append(date)
print(len(test_dates))
# print(test_dates)

#Check that they don't overlap, should return empty
print(len(train_dates)+len(test_dates))
print(set(train_dates).intersection(test_dates))

456
410
46
456
set()


In [71]:
boxes = []
imgdates = []
scenes = []

for td in train_dates:
    BoxID, imgdate, scene = td.split(',')
    boxes.append(BoxID)
    imgdates.append(imgdate)
    scenes.append(scene)

train_df = pd.DataFrame(list(zip(boxes, imgdates, scenes)), columns=['BoxID', 'datetime', 'Scene'])
train_df.head()

Unnamed: 0,BoxID,datetime,Scene
0,259,2015-07-16,LC82320152015197LGN00
1,1,2017-03-18,LC80360042017077LGN00
2,1,2014-07-20,LC80160012014201LGN00
3,2,2014-08-30,LC80310052014242LGN00
4,120,2014-11-02,LC82320172014306LGN00


In [72]:
#export to csv and text
train_df.to_csv(basepath+'/Manual/train.csv', sep=',', index=False, header=False)
train_df.to_csv(basepath+'/Manual/train.txt', sep=' ', index=False, header=False)

### Grab test dates

In [73]:
boxes = []
imgdates = []
scenes = []

for td in test_dates:
    BoxID, imgdate, scene = td.split(',')
    boxes.append(BoxID)
    imgdates.append(imgdate)
    scenes.append(scene)

test_df = pd.DataFrame(list(zip(boxes, imgdates, scenes)), columns=['BoxID', 'datetime', 'Scene'])
test_df.head()
#export to csv and text
test_df.to_csv(basepath+'/Manual/test.csv', sep=',', index=False, header=False)
test_df.to_csv(basepath+'/Manual/test.txt', sep=' ', index=False, header=False)

## Define objective function

I'm using a modified version of the L1-norm. The objective funciton will be 1/N * (|Xa-Xm|i) where i=3 (for each centerline 50, 25, 75) and N equals the number of delineations generated (the more the better). The goal then is to minimize the objective function.

Read in centerlines_df:

In [74]:
# pathrows_df = pd.read_csv(basepath+'LS_pathrows_multiple.csv', sep=',', usecols =[0,1,2], dtype=str)
centerline_df = pd.read_csv('/home/jukes/Documents/Sample_glaciers/Boxes_coords_pathrows.csv', sep=',',dtype=str)
centerline_df = centerline_df.set_index('BoxID')
centerline_df

Unnamed: 0_level_0,lmid50_x,lmid50_y,rmid50_x,m50,b50,lmid25_x,lmid25_y,rmid25_x,m25,b25,lmid75_x,lmid75_y,rmid75_x,m75,b75
BoxID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
259,234.5,497.0,561.0,0.0704441041347626,480.48085758039815,236.25,475.5,562.5,0.0704980842911877,458.8575803981623,232.75,518.5,559.5,0.0703902065799541,502.104134762634
120,111.0,148.5,182.5,0.0559440559440559,142.2902097902098,112.0,133.25,183.25,0.0491228070175438,126.98426573426572,110.0,163.75,181.75,0.0627177700348432,157.59615384615384
277,151.5,208.0,243.5,-0.0543478260869565,216.2336956521739,150.75,194.5,242.75,-0.0597826086956521,202.6929347826087,152.25,221.5,244.25,-0.0489130434782608,229.77445652173915
531,166.0,250.5,287.5,-0.1563786008230452,276.4588477366255,162.5,227.25,283.75,-0.156701030927835,252.66152263374485,169.5,273.75,291.25,-0.1560574948665297,300.2561728395062
235,178.5,207.0,287.0,0.2211981566820276,167.51612903225805,181.75,193.0,290.0,0.2217090069284064,152.79723502304148,175.25,221.0,284.0,0.2206896551724138,182.23502304147465
174,262.5,346.0,393.0,-0.3333333333333333,433.5,253.75,319.5,384.0,-0.3397312859884837,404.0833333333333,271.25,372.5,402.0,-0.3269598470363288,462.9166666666666
1,117.0,164.5,189.0,-0.1111111111111111,177.5,116.0,154.75,188.0,-0.1111111111111111,167.63888888888889,118.0,174.25,190.0,-0.1111111111111111,187.36111111111111
33,369.0,480.0,612.0,-0.0061728395061728,482.27777777777777,369.0,453.5,612.0,-0.0092592592592592,455.77777777777777,369.0,506.5,612.0,-0.0030864197530864,508.77777777777777
4,499.5,653.0,788.0,-0.2980935875216637,801.897746967071,484.75,603.5,773.0,-0.2983521248915872,748.0008665511265,514.25,702.5,803.0,-0.2978354978354978,855.7946273830156
2,467.5,575.5,752.0,0.0333919156414762,559.8892794376098,468.75,537.75,753.5,0.0324846356453029,522.097539543058,466.25,613.25,750.5,0.0343007915567282,597.6810193321617


#### Our objective function

In [281]:
DOA = '2020_01_20'

def calc_theta(size_thresh, mod_thresh):
    #Calculate automated tpos
    #run terminus_pick.tcl using each of the thresholds
    terminus_pick = '/home/akhalil/src/xsmurf-2.7/main/xsmurf -nodisplay /home/jukes/Documents/Scripts/terminus_pick.tcl '+str(size_thresh)+' '+str(mod_thresh)
    print(terminus_pick)
    subprocess.call(terminus_pick, shell=True)
    
    #pull automated terminus position from the output
    #grab each output file
    differences = []
    
    for file in os.listdir(sg_path):
        if DOA in file and file.endswith('csv'):
            if len(file)>28:
                print(file)

                #read the output file in and calculate terminus position for each image
                #pull automated terminus delineations
                auto_tpos = 

                #pull in manual tpos 
                man_tpos = 

                diff = abs(auto_tpos - man_tpos)
                differences.append(diff)
    
#     #return objective function = distance between the two
#     return np.average(differences)

IndentationError: expected an indented block (<ipython-input-281-38d79a1c7f96>, line 13)

In [2]:
os.listdir(sg_path)

NameError: name 'os' is not defined

#### Define the minimization function

In [242]:
def minimize(size_guess, mod_guess):
    minimum = scipy.optimize.fmin(center_dist, [size_guess, mod_guess], args=(size_guess, mod_guess),full_output=True)
    xopt = minimum[0][0]
    funcval = minimum[1]
    return xopt, funcval

#### Run the optimization

In [273]:
base_size_thresh = 0.8
base_mod_thresh = 0.8
thresh_range = 0.15
interval = 1000

size_guesses = np.linspace(base_size_thresh-thresh_range, base_size_thresh+thresh_range, interval)
mod_guesses = np.linspace(base_mod_thresh-thresh_range, base_mod_thresh+thresh_range, interval)

In [247]:
t_list = []
m_list = []

iterations = 5

for i in range(0, iterations-1):
    size_guess = random.choice(size_guesses)
    mod_guess = random.choice(mod_guesses)
    t, m = minimize(size_guess, mod_guess)
    t_list.append(t)
    m_list.append(m)

results_df = pd.DataFrame(list(zip(t_list, m_list)), columns=['min_th', 'min_f(th)'])

TypeError: center_dist() takes 1 positional argument but 3 were given

In [221]:
# size_thresh = 0.8
# mod_thresh = 0.7
# terminus_pick = '/home/akhalil/src/xsmurf-2.7/main/xsmurf -nodisplay /home/jukes/Documents/Scripts/terminus_pick.tcl '+str(size_thresh)+' '+str(mod_thresh)
# print(terminus_pick)
# subprocess.call(terminus_pick, shell=True)

## Cross-validation