## Code to calculate the auto and manual difference using an objective function

Weighted least squares solution

### Import packages, functions, manual and automated data

In [40]:
import pandas as pd
import numpy as np
import os
import subprocess
import matplotlib.pyplot as plt
import numpy.ma as ma
import datetime
import math

manual_path = '/media/jukes/jukes1/Manual/'; manual_filename = 'manual_tpos.csv'
auto_path = '/home/jukes/Documents/Sample_glaciers/'

In [41]:
os.chdir('/home/jukes/automated-glacier-terminus') #import necessary functions:
from automated_terminus_functions import distance

In [59]:
# MANUAL info
condition_df = pd.read_csv(manual_path+'LS8_manual_delineation_info.csv', dtype=str)

# TEST images
test_df = pd.read_csv(manual_path+'test.csv', dtype=str, header=None)
test_df = test_df.rename(columns={0: 'BoxID', 1: 'datetimes', 2: 'Scene'})
test_df.head()

Unnamed: 0,BoxID,datetimes,Scene
0,1,2013-09-03,LC80320052013246LGN00
1,1,2014-06-09,LC80010152014160LGN00
2,1,2015-06-26,LC80350052015177LGN00
3,1,2015-08-01,LC82320182015213LGN00
4,1,2015-08-24,LC80320052015236LGN00


In [66]:
#MANUAL TERMINUS POSITIONS
manual_df = pd.read_csv(manual_path+manual_filename, dtype=str,sep=',')

#SPLIT INTO 3 DATAFRAMES FOR 3 FLOWLINES:
manual50 = manual_df[['BoxID','datetimes', 'intersect_x', 'intersect_y', 
                                      'tpos50']].copy().reset_index(drop=True).rename(columns={"tpos50": "tpos"})
manual25 = manual_df[['BoxID','datetimes', 'intersect_x', 'intersect_y', 
                                      'tpos25']].copy().reset_index(drop=True).rename(columns={"tpos25": "tpos"})
manual75 = manual_df[['BoxID','datetimes', 'intersect_x', 'intersect_y',
                                      'tpos75']].copy().reset_index(drop=True).rename(columns={"tpos75": "tpos"})
# manual_df.head()

In [61]:
newIDs = []
for item in np.array(condition_df['BoxID']):
    if type(item) != float:
        newIDs.append(item.rjust(3, '0'))
    else:
        newIDs.append('NaN')
condition_df['BoxID'] = newIDs 
condition_df.head()

Unnamed: 0.1,Unnamed: 0,BoxID,datetimes,Path,Row,Scene,Condition,Not_exact_date,Jukes
0,21,1,2013-05-03,35,5,LC80350052013123LGN01,,,Jackie
1,24,1,2013-05-05,33,5,LC80330052013125LGN01,Sea ice,,Delineation rate (2 ppl):
2,27,1,2013-05-14,32,5,LC80320052013134LGN03,Sea ice,,1 - 2.5 hrs / 160 lines = ~1.9 min/line
3,37,1,2013-05-28,34,5,LC80340052013148LGN00,Sea ice,2013-05-29,2 - 3.25 hrs / 168 lines = ~2.3 min/line
4,61,1,2013-08-23,35,5,LC80350052013235LGN00,Clear,,120 -


In [85]:
# TEST image conditions by condition - manual
merge1 = manual_df.merge(condition_df, how='inner', on=['datetimes', 'BoxID']).drop(['Unnamed: 0_x', 
                                                                 'Unnamed: 0_y',
                                                                 'Line_x', 'Line_y', 
                                                                 'Jukes', 'Not_exact_date'], axis=1)
merge2 = merge1.merge(test_df, how='inner', on=['datetimes', 'BoxID', 'Scene'])
merge2

Unnamed: 0,BoxID,datetimes,intersect_x,intersect_y,tpos50,tpos25,tpos75,Path,Row,Scene,Condition
0,1,2013-09-03,141,158,375.0,270.0,465.0,32,5,LC80320052013246LGN00,Thin clouds
1,1,2015-06-26,142,159,390.0,300.0,435.0,35,5,LC80350052015177LGN00,Dim
2,1,2015-08-24,141,158,375.0,120.0,405.0,32,5,LC80320052015236LGN00,Clear
3,1,2016-05-04,141,159,375.0,120.0,435.0,34,5,LC80340052016125LGN00,Sea ice
4,1,2016-05-29,142,159,390.0,120.0,435.0,33,5,LC80330052016150LGN00,Sea ice
5,1,2017-03-13,135,158,285.0,120.0,345.0,33,5,LC80330052017072LGN00,Sea ice
6,1,2017-03-18,136,158,300.0,105.0,360.0,36,4,LC80360042017077LGN00,Sea ice
7,1,2017-04-05,136,158,300.0,120.0,360.0,34,5,LC80340052017095LGN00,Sea ice
8,2,2014-06-25,664,575,2932.5,2561.25,3093.75,33,5,LC80330052014176LGN00,Sea ice
9,2,2014-07-04,655,576,2797.5,2516.25,3093.75,32,5,LC80320052014185LGN00,Sea ice


In [247]:
auto75

Unnamed: 0,BoxID,datetimes,Scene,tpos
0,2,2013-05-28,LC80340052013148LGN00,3738.75
1,2,2014-04-17,LC80300052014107LGN00,3123.75
2,2,2014-06-27,LC80310052014178LGN00,3228.75
3,2,2014-08-12,LC80330052014224LGN00,3123.75
4,2,2014-09-20,LC80340052014263LGN00,2733.75
5,2,2014-09-22,LC80320052014265LGN00,2733.75
6,2,2014-09-24,LC80300052014267LGN00,2718.75
7,2,2014-09-29,LC80330052014272LGN00,2733.75
8,2,2014-10-01,LC80310052014274LGN00,2733.75
9,2,2015-03-06,LC80350052015065LGN00,2418.75


In [248]:
np.average([abs(2002.5-2932.5), abs(2921.25-2531.25), abs(3037.5-3082.5), abs(2501.25-2606.25), abs(3123.75-3153.75)])

300.0

In [234]:
# BoxIDs = ['001', '002', '120', '174', '259']
BoxIDs = ['002']
dfs = []

for BoxID in BoxIDs:
    auto50 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline50_filtered.csv', dtype=str,sep=',')
    auto50 = auto50[['BoxID','datetimes', 'Scene', 'tpos']].copy()
    auto25 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline25_filtered.csv', dtype=str,sep=',')
    auto25 = auto25[['BoxID','datetimes', 'Scene', 'tpos']].copy()
    auto75 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline75_filtered.csv', dtype=str,sep=',')
    auto75 = auto75[['BoxID','datetimes', 'Scene', 'tpos']].copy()
#     autodfs = [auto50, auto25, auto75]

#     manual = merge2[merge2.BoxID == BoxID].copy()
#     manual50 = manual[['BoxID','datetimes', 'Scene', 'tpos50', 'Condition']].copy().rename(columns={"tpos50": "tpos"})
#     manual25 = manual[['BoxID','datetimes', 'Scene', 'tpos25', 'Condition']].copy().rename(columns={"tpos25": "tpos"})
#     manual75 = manual[['BoxID','datetimes', 'Scene', 'tpos75', 'Condition']].copy().rename(columns={"tpos75": "tpos"})
#     manualdfs = [manual50, manual25, manual75]

#     cdfs = []
#     for i in range(0, len(manualdfs)):
#         adf = autodfs[i]; mdf = manualdfs[i]
#         cdf = mdf.merge(adf, how='inner', on='datetimes')
#         cdf = cdf.astype({'tpos_x': 'float', 'tpos_y': 'float'})
#         cdf['diff'] = abs(np.array(cdf.tpos_x) - np.array(cdf.tpos_y))
#         cdfs.append(cdf)
#     dfs.append(pd.concat(cdfs))

In [218]:
compare_cdf = pd.concat(dfs)
set(compare_cdf.Condition)

{'Clear', 'Dim', 'Sea ice', 'Sea ice ', 'Thin clouds'}

In [266]:
cond_df = compare_cdf[compare_cdf['Condition'] == 'Clear']
print(len(cond_df))
np.average(list(cond_df['diff']))

7


240.0

In [258]:
(7*55.71 + 14*182.14)/21

139.99666666666667

In [268]:
75/2

37.5

## Theta calculation

In [82]:
#SIGMAS (DATA ERRORS) ALONG EACH FLOWLINE (FROM INTERANALYST DIFFERENCES)
sigmas = [35.02, 27.65, 30.45]
sigma_avg = np.average(sigmas); print(sigma_avg)

31.040000000000003


In [101]:
theta1s = []; theta2s = []; compare_dfs = []
#FOR EACH GLACIER BOXID:
BoxIDs = list(set(manual_df.BoxID))
for BoxID in BoxIDs:
    print("Box"+BoxID)
    #grab automated tpos
    auto50 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline50_filtered.csv', dtype=str,sep=',')
    auto25 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline25_filtered.csv', dtype=str,sep=',')
    auto75 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline75_filtered.csv', dtype=str,sep=',')
    autodfs = [auto50, auto25, auto75]
    #grab manual tpos that corresponds to just boxID
    manual50_df = manual50[manual50.BoxID == BoxID].copy()
    manual25_df = manual25[manual25.BoxID == BoxID].copy()
    manual75_df = manual75[manual75.BoxID == BoxID].copy()
    manualdfs = [manual50_df, manual25_df, manual75_df]
    #calculate difference in terminus positions along the three flowlines
    lists3 = []; lists3_norm = []
    for i in range(0, len(manualdfs)):
        man = manualdfs[i]; auto = autodfs[i]; sigma = sigmas[i]
        compare_df = man.merge(auto, how='inner', on=['datetimes'])
        #cast terminus positions into float values
        compare_df = compare_df.astype({'tpos_x': 'float', 'tpos_y': 'float'})
        #subtract the absolute value of the difference and put into df as a column named "diff"
        compare_df['diff'] = abs(np.array(compare_df.tpos_x) - np.array(compare_df.tpos_y))  
        compare_df['diff/sigma'] = abs(np.array(compare_df.tpos_x) - np.array(compare_df.tpos_y))/sigma
        lists3.append(list(compare_df['diff']))  
        lists3_norm.append(list(compare_df['diff/sigma']))
    diff_all = lists3[0]+lists3[1]+lists3[2] #list of all the differences between manual and auto
    normalizeddiff_all = lists3_norm[0]+lists3_norm[1]+lists3_norm[2] #list of all the normalized differences
    N = len(diff_all) #number of total intersections
    
    #CALCULATE THETA:
    theta1 = (1.0/N)*np.sum(normalizeddiff_all) #sum of normalized differences along flowlines
    theta2 = (1.0/N)*(np.sum(diff_all)/sigma_avg) #sum of differences normalized by average sigma
    theta1s.append(theta1); theta2s.append(theta2)
    print("Theta values:",theta1, theta2)
    
    compare_dfs.append(compare_df)

Box174
Theta values: 16.2436864750265 14.980670103092782
Box120
Theta values: 5.468603853588577 5.315721649484535
Box001
Theta values: 4.611837997139841 4.4642857142857135
Box259
Theta values: 1.8672395877956394 1.7949189985272458
Box002


ZeroDivisionError: float division by zero

In [102]:
manual_dfs

NameError: name 'manual_dfs' is not defined

In [60]:
list(zip(columns, theta1_for_df, theta2_for_df))

[('Theta_avg', nan, nan),
 ('174', 15.250678739646302, 15.153724923376984),
 ('002', 43.19720044648473, 43.18432633252906),
 ('001', 32.47436749614078, 32.26716320559405),
 ('120', 13.919454254169144, 13.744797471440512),
 ('259', 26.457638700697967, 26.416362722840447)]

In [67]:
#CALCULATE OVERALL THETA and write results to csv
theta1_all = np.average(theta1s)
theta2_all = np.average(theta2s)

#organize data
columns = ['Theta_avg']+BoxIDs
theta1_for_df = [theta1_all]+theta1s
theta2_for_df = [theta2_all]+theta2s
#write to csv
pd.DataFrame(list(zip(columns, theta1_for_df, theta2_for_df)), 
             columns=['ID', 'theta1', 'theta2']).to_csv(manual_path+'thetas.csv', sep=',') 

#ADJUST FILENAME TO INCLUDE PARAMETERS OR SOMETHING

In [80]:
def objective_func(manual_df):
    #SPLIT INTO 3 DATAFRAMES FOR 3 FLOWLINES:
    manual50 = manual_df[['BoxID','datetimes', 'intersect_x', 'intersect_y', 
                                          'tpos50']].copy().reset_index(drop=True).rename(columns={"tpos50": "tpos"})
    manual25 = manual_df[['BoxID','datetimes', 'intersect_x', 'intersect_y', 
                                          'tpos25']].copy().reset_index(drop=True).rename(columns={"tpos25": "tpos"})
    manual75 = manual_df[['BoxID','datetimes', 'intersect_x', 'intersect_y',
                                          'tpos75']].copy().reset_index(drop=True).rename(columns={"tpos75": "tpos"})
    #SIGMAS (DATA ERRORS) ALONG EACH FLOWLINE (FROM INTERANALYST DIFFERENCES)
    sigmas = [35.02, 27.65, 30.45]; sigma_avg = np.average(sigmas);
    
    theta1s = []; theta2s = []
    #FOR EACH GLACIER BOXID:
    BoxIDs = list(set(manual_df.BoxID))
    for BoxID in BoxIDs:
        print("Box"+BoxID)
        #grab automated tpos
        auto50 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline50_filtered.csv', dtype=str,sep=',')
        auto25 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline25_filtered.csv', dtype=str,sep=',')
        auto75 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline75_filtered.csv', dtype=str,sep=',')
        autodfs = [auto50, auto25, auto75]
        #grab manual tpos that corresponds to just boxID
        manual50_df = manual50[manual50.BoxID == BoxID].copy()
        manual25_df = manual25[manual25.BoxID == BoxID].copy()
        manual75_df = manual75[manual75.BoxID == BoxID].copy()
        manualdfs = [manual50, manual25, manual75]
        #calculate difference in terminus positions along the three flowlines
        lists3 = []; lists3_norm = []
        for i in range(0, len(manualdfs)):
            man = manualdfs[i]; auto = autodfs[i]; sigma = sigmas[i]
            compare_df = man.merge(auto, how='inner', on=['datetimes'])
            #cast terminus positions into float values
            compare_df = compare_df.astype({'tpos_x': 'float', 'tpos_y': 'float'})
            #subtract the absolute value of the difference and put into df as a column named "diff"
            compare_df['diff'] = abs(np.array(compare_df.tpos_x) - np.array(compare_df.tpos_y))  
            compare_df['diff/sigma'] = abs(np.array(compare_df.tpos_x) - np.array(compare_df.tpos_y))/sigma
            lists3.append(list(compare_df['diff']))  
            lists3_norm.append(list(compare_df['diff/sigma']))
        diff_all = lists3[0]+lists3[1]+lists3[2] #list of all the differences between manual and auto
        normalizeddiff_all = lists3_norm[0]+lists3_norm[1]+lists3_norm[2] #list of all the normalized differences
        N = len(diff_all) #number of total intersections

        #CALCULATE THETA:
        theta1 = (1.0/N)*np.sum(normalizeddiff_all) #sum of normalized differences along flowlines
        theta2 = (1.0/N)*(np.sum(diff_all)/sigma_avg) #sum of differences normalized by average sigma
        theta1s.append(theta1); theta2s.append(theta2)
        #print("Theta values:",theta1, theta2)   
        
    #CALCULATE OVERALL THETA
    theta1_all = np.average(theta1s); theta2_all = np.average(theta2s)
    #organize data in dataframe
    column_titles = ['Theta_avg']+BoxIDs
    theta1_for_df = [theta1_all]+theta1s; theta2_for_df = [theta2_all]+theta2s
    #write to csv
    theta_df = pd.DataFrame(list(zip(column_titles, theta1_for_df, theta2_for_df)), 
                 columns=['ID', 'theta1', 'theta2'])
    return theta_df 

In [78]:
# objective_func(manual_df)