# Parameter optimization for size and mod thresholds

## Import packages and set paths

In [3]:
import pandas as pd
import numpy as np
import os
import subprocess
import matplotlib.pyplot as plt
import cv2
import matplotlib.image as mpimg
import matplotlib.pylab as pl
import numpy.ma as ma
import datetime
import math
import scipy.optimize
import random

In [4]:
basepath = '/media/jukes/jukes1/'
sg_path = '/home/jukes/Documents/Sample_glaciers/'

## Read in analysis dates for manual and automated delinations, convert to datetime objs

In [49]:
#read in manual image dates
manual_df= pd.read_csv(basepath+'Manual/manual_tpos.csv', sep=',', dtype=str, header=0)
manual_df = manual_df.dropna()
manual_df.drop_duplicates(subset=['BoxID','datetimes'])
print(manual_df.shape)
manual_df.head()

(511, 8)


Unnamed: 0.1,Unnamed: 0,BoxID,datetimes,Line_x,Line_y,intersect_x,intersect_y,term_position
0,186,1,2013-05-05,"[135, 134, 135, 133, 133, 132, 134, 134, 135, ...","[140, 141, 141, 142, 143, 144, 145, 146, 146, ...",143.0,160.0,393.5177886703472
1,269,1,2013-05-14,"[131, 132, 133, 133, 134, 135, 136, 136, 137, ...","[143, 144, 145, 146, 147, 148, 148, 149, 149, ...",143.0,160.0,393.5177886703472
2,184,1,2013-05-29,"[132, 132, 132, 134, 134, 136, 136, 137, 139, ...","[142, 143, 144, 145, 146, 146, 147, 148, 149, ...",144.0,161.0,406.7324058886875
3,254,1,2013-08-23,"[133, 132, 133, 131, 132, 131, 130, 131, 131, ...","[141, 142, 142, 143, 143, 144, 145, 146, 147, ...",141.0,161.0,361.9478553604096
4,266,1,2013-08-27,"[130, 129, 130, 129, 130, 131, 132, 133, 133, ...","[143, 144, 144, 147, 148, 148, 148, 149, 150, ...",140.0,160.0,348.9717037239553


In [20]:
#Read in datetags csv as datetime_df
automated_df = pd.read_csv(sg_path+'imgdates.csv', sep=',', dtype=str, header=0, names=['Scene', 'datetimes'])
print(automated_df.shape)
automated_df.head()

(864, 2)


Unnamed: 0,Scene,datetimes
510,LC80090132013101LGN01,2013-04-11
1612,LC80090142013101LGN01,2013-04-11
577,LC82330172013102LGN01,2013-04-12
940,LC82330152013102LGN01,2013-04-12
445,LC80080142013110LGN01,2013-04-20


## Find overlaps and select 90% for training, 10% for testing

In [66]:
overlap_df = manual_df.merge(automated_df, how='inner', on=['datetimes'])
overlap_df = overlap_df.drop(['Line_x', 'Line_y'], axis=1)
overlap_df = overlap_df.drop_duplicates(['BoxID','datetimes'])
overlap_df = overlap_df.sort_values(by=['BoxID','datetimes'], ascending=True)
overlap_df.shape

(456, 7)

In [69]:
dates = []
for idx, row in overlap_df.iterrows():
    dateID = str(row['BoxID'])+','+str(row['datetimes']+','+str(row['Scene']))
    dates.append(dateID)
# print(date_IDs)

### Select 90% for training

In [70]:
N = len(dates)
print(N)

#pick a random sample of dates for training
train_dates = random.sample(dates, int(N*0.9))
print(len(train_dates))
# print(train_dates)

#grab remaining for testing
test_dates = []
for date in dates:
    if date not in train_dates:
        test_dates.append(date)
print(len(test_dates))
# print(test_dates)

#Check that they don't overlap, should return empty
print(len(train_dates)+len(test_dates))
print(set(train_dates).intersection(test_dates))

456
410
46
456
set()


In [71]:
boxes = []
imgdates = []
scenes = []

for td in train_dates:
    BoxID, imgdate, scene = td.split(',')
    boxes.append(BoxID)
    imgdates.append(imgdate)
    scenes.append(scene)

train_df = pd.DataFrame(list(zip(boxes, imgdates, scenes)), columns=['BoxID', 'datetime', 'Scene'])
train_df.head()

Unnamed: 0,BoxID,datetime,Scene
0,259,2015-07-16,LC82320152015197LGN00
1,1,2017-03-18,LC80360042017077LGN00
2,1,2014-07-20,LC80160012014201LGN00
3,2,2014-08-30,LC80310052014242LGN00
4,120,2014-11-02,LC82320172014306LGN00


In [72]:
#export to csv and text
train_df.to_csv(basepath+'/Manual/train.csv', sep=',', index=False, header=False)
train_df.to_csv(basepath+'/Manual/train.txt', sep=' ', index=False, header=False)

### Grab test dates

In [73]:
boxes = []
imgdates = []
scenes = []

for td in test_dates:
    BoxID, imgdate, scene = td.split(',')
    boxes.append(BoxID)
    imgdates.append(imgdate)
    scenes.append(scene)

test_df = pd.DataFrame(list(zip(boxes, imgdates, scenes)), columns=['BoxID', 'datetime', 'Scene'])
test_df.head()
#export to csv and text
test_df.to_csv(basepath+'/Manual/test.csv', sep=',', index=False, header=False)
test_df.to_csv(basepath+'/Manual/test.txt', sep=' ', index=False, header=False)

## Define objective function

Example:

In [280]:
# def f(x):
#     return x**2
# minimum = scipy.optimize.fmin(f, 1)
# minimum[0]

#### Our objective function

In [281]:
DOA = '2019_12_16'

def center_dist(size_thresh, mod_thresh):
    #Calculate automated tpos
    #run terminus_pick.tcl using each of the thresholds
    terminus_pick = '/home/akhalil/src/xsmurf-2.7/main/xsmurf -nodisplay /home/jukes/Documents/Scripts/terminus_pick.tcl '+str(size_thresh)+' '+str(mod_thresh)
    print(terminus_pick)
    subprocess.call(terminus_pick, shell=True)
    
    #pull automated terminus position from the output
    #grab each output file
    differences = []
    
    for file in os.listdir(sg_path):
        if DOA in file and file.endswith('csv'):
            if len(file)>28:
                print(file)

                #read the output file in and calculate terminus position for each image
                #somehow gotta run Results script and pull the automated terminus positions
                auto_tpos = 

                #pull in manual tpos 
                man_tpos = 

                diff = abs(auto_tpos - man_tpos)
                differences.append(diff)
    
    #return objective function = distance between the two
    return np.average(differences)

IndentationError: expected an indented block (<ipython-input-281-38d79a1c7f96>, line 13)

In [2]:
os.listdir(sg_path)

NameError: name 'os' is not defined

#### Define the minimization function

In [242]:
def minimize(size_guess, mod_guess):
    minimum = scipy.optimize.fmin(center_dist, [size_guess, mod_guess], args=(size_guess, mod_guess),full_output=True)
    xopt = minimum[0][0]
    funcval = minimum[1]
    return xopt, funcval

#### Run the optimization

In [273]:
base_size_thresh = 0.8
base_mod_thresh = 0.8
thresh_range = 0.15
interval = 1000

size_guesses = np.linspace(base_size_thresh-thresh_range, base_size_thresh+thresh_range, interval)
mod_guesses = np.linspace(base_mod_thresh-thresh_range, base_mod_thresh+thresh_range, interval)

In [247]:
t_list = []
m_list = []

iterations = 5

for i in range(0, iterations-1):
    size_guess = random.choice(size_guesses)
    mod_guess = random.choice(mod_guesses)
    t, m = minimize(size_guess, mod_guess)
    t_list.append(t)
    m_list.append(m)

results_df = pd.DataFrame(list(zip(t_list, m_list)), columns=['min_th', 'min_f(th)'])

TypeError: center_dist() takes 1 positional argument but 3 were given

In [221]:
# size_thresh = 0.8
# mod_thresh = 0.7
# terminus_pick = '/home/akhalil/src/xsmurf-2.7/main/xsmurf -nodisplay /home/jukes/Documents/Scripts/terminus_pick.tcl '+str(size_thresh)+' '+str(mod_thresh)
# print(terminus_pick)
# subprocess.call(terminus_pick, shell=True)

## Cross-validation