## Code to calculate the auto and manual difference using an objective function

Weighted least squares solution

### Import packages, functions, manual and automated data

In [34]:
import pandas as pd
import numpy as np
import os
import subprocess
import matplotlib.pyplot as plt
import numpy.ma as ma
import datetime
import math

manual_path = '/media/jukes/jukes1/Manual/'; manual_filename = 'manual_tpos_c1.csv'
auto_path = '/home/jukes/Documents/Sample_glaciers/'

In [35]:
os.chdir('/home/jukes/automated-glacier-terminus') #import necessary functions:
from automated_terminus_functions import distance

In [50]:
# MANUAL info
condition_df = pd.read_csv(manual_path+'LS8_manual_delineation_info.csv', dtype=str)

# TEST images
test_df = pd.read_csv(manual_path+'test.csv', dtype=str, header=None)
test_df = test_df.rename(columns={0: 'BoxID', 1: 'datetimes', 2: 'Scene'})
test_df.head()

Unnamed: 0,BoxID,datetimes,Scene
0,1,2013-09-03,LC80320052013246LGN00
1,1,2014-06-09,LC80010152014160LGN00
2,1,2015-06-26,LC80350052015177LGN00
3,1,2015-08-01,LC82320182015213LGN00
4,1,2015-08-24,LC80320052015236LGN00


In [83]:
# examine_df = condition_df.merge(test_df, how='inner', on=['datetimes', 'Scene', 'BoxID'])
# examine_df 

In [51]:
#MANUAL TERMINUS POSITIONS
manual_df = pd.read_csv(manual_path+manual_filename, dtype=str,sep=',')

#SPLIT INTO 3 DATAFRAMES FOR 3 FLOWLINES:
manual50 = manual_df[['BoxID','datetimes', 'intersect_x', 'intersect_y', 
                                      'tpos50']].copy().reset_index(drop=True).rename(columns={"tpos50": "tpos"})
manual25 = manual_df[['BoxID','datetimes', 'intersect_x', 'intersect_y', 
                                      'tpos25']].copy().reset_index(drop=True).rename(columns={"tpos25": "tpos"})
manual75 = manual_df[['BoxID','datetimes', 'intersect_x', 'intersect_y',
                                      'tpos75']].copy().reset_index(drop=True).rename(columns={"tpos75": "tpos"})
# manual_df.head()

In [52]:
# manual_df

In [53]:
newIDs = []
for item in np.array(condition_df['BoxID']):
    if type(item) != float:
        newIDs.append(item.rjust(3, '0'))
    else:
        newIDs.append('NaN')
condition_df['BoxID'] = newIDs 
condition_df

Unnamed: 0.1,Unnamed: 0,BoxID,datetimes,Path,Row,Scene,Condition,Not_exact_date,Jukes
0,21,001,2013-05-03,35,5,LC80350052013123LGN01,,,Jackie
1,24,001,2013-05-05,33,5,LC80330052013125LGN01,Sea ice,,Delineation rate (2 ppl):
2,27,001,2013-05-14,32,5,LC80320052013134LGN03,Sea ice,,1 - 2.5 hrs / 160 lines = ~1.9 min/line
3,37,001,2013-05-28,34,5,LC80340052013148LGN00,Sea ice,2013-05-29,2 - 3.25 hrs / 168 lines = ~2.3 min/line
4,61,001,2013-08-23,35,5,LC80350052013235LGN00,Clear,,120 -
...,...,...,...,...,...,...,...,...,...
4032,1897,,,,,,,,
4033,1908,,,,,,,,
4034,1909,,,,,,,,
4035,1910,,,,,,,,


In [54]:
# TEST image conditions by condition - manual
merge1 = manual_df.merge(condition_df, how='inner', on=['datetimes', 'BoxID']).drop(['Unnamed: 0_x', 
                                                                 'Unnamed: 0_y',
                                                                 'Line_x', 'Line_y', 
                                                                 'Jukes', 'Not_exact_date'], axis=1)
merge2 = merge1.merge(test_df, how='inner', on=['datetimes', 'BoxID', 'Scene'])
merge2

Unnamed: 0,BoxID,datetimes,intersect_x,intersect_y,tpos50,tpos25,tpos75,Path,Row,Scene,Condition
0,1,2013-09-03,125.0,143.0,360.312364483929,345.08151210982004,406.732405888688,32,5,LC80320052013246LGN00,Thin clouds
1,1,2015-06-26,128.0,144.0,406.109591120427,375.0749925015,406.732405888688,35,5,LC80350052015177LGN00,Dim
2,1,2015-08-24,126.0,143.0,375.299880095904,330.085216269981,361.94785536041,32,5,LC80320052015236LGN00,Clear
3,1,2016-05-04,127.0,144.0,391.15214431215895,360.078116524734,391.798736598269,34,5,LC80340052016125LGN00,Sea ice
4,1,2016-05-29,127.0,144.0,391.15214431215895,345.08151210982004,391.798736598269,33,5,LC80330052016150LGN00,Sea ice
5,1,2017-03-13,121.0,144.0,301.49626863362704,255.110270275424,330.766156067999,33,5,LC80330052017072LGN00,Sea ice
6,1,2017-03-18,122.0,143.0,315.35694062443,255.110270275424,332.123847382268,36,4,LC80360042017077LGN00,Sea ice
7,1,2017-04-05,123.0,143.0,330.340733183179,285.098667131223,330.766156067999,34,5,LC80340052017095LGN00,Sea ice
8,2,2014-06-25,613.0,506.0,3109.90554518944,2525.31248066452,3152.6997359406105,33,5,LC80330052014176LGN00,Sea ice
9,2,2014-07-04,599.0,507.0,2899.40726701166,2465.53303060413,3167.6336791049603,32,5,LC80320052014185LGN00,Sea ice


In [87]:
# # look at 002 specifically
# BoxID = '002'
# auto50 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline50_filtered.csv', dtype=str,sep=',')
# auto50 = auto50[['BoxID','datetimes', 'Scene', 'tpos']].copy()
# auto25 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline25_filtered.csv', dtype=str,sep=',')
# auto25 = auto25[['BoxID','datetimes', 'Scene', 'tpos']].copy()
# auto75 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline75_filtered.csv', dtype=str,sep=',')
# auto75 = auto75[['BoxID','datetimes', 'Scene', 'tpos']].copy()

In [88]:
# auto75[auto75['datetimes']=='2016-07-07']

In [55]:
BoxIDs = ['001', '002', '120', '174', '259']
# BoxIDs = ['002']
dfs = []

for BoxID in BoxIDs:
    auto50 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline50_filtered.csv', dtype=str,sep=',')
    auto50 = auto50[['BoxID','datetimes', 'Scene', 'tpos']].copy()
    auto25 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline25_filtered.csv', dtype=str,sep=',')
    auto25 = auto25[['BoxID','datetimes', 'Scene', 'tpos']].copy()
    auto75 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline75_filtered.csv', dtype=str,sep=',')
    auto75 = auto75[['BoxID','datetimes', 'Scene', 'tpos']].copy()
    autodfs = [auto50, auto25, auto75]

    manual = merge2[merge2.BoxID == BoxID].copy() # USE MERGE 2 FOR X-VALIDATION
#     manual = merge1[merge1.BoxID == BoxID].copy() # USE MERGE 1 FOR FULL SET
    manual50 = manual[['BoxID','datetimes', 'Scene', 'tpos50', 'Condition']].copy().rename(columns={"tpos50": "tpos"})
    manual25 = manual[['BoxID','datetimes', 'Scene', 'tpos25', 'Condition']].copy().rename(columns={"tpos25": "tpos"})
    manual75 = manual[['BoxID','datetimes', 'Scene', 'tpos75', 'Condition']].copy().rename(columns={"tpos75": "tpos"})
    manualdfs = [manual50, manual25, manual75]

    cdfs = []
    for i in range(0, len(manualdfs)):
        adf = autodfs[i]; mdf = manualdfs[i]
        cdf = mdf.merge(adf, how='inner', on='datetimes')
        cdf = cdf.astype({'tpos_x': 'float', 'tpos_y': 'float'})
        cdf['diff'] = abs(np.array(cdf.tpos_x) - np.array(cdf.tpos_y))
        cdfs.append(cdf)
    dfs.append(pd.concat(cdfs))

In [90]:
# compare_cdf

In [56]:
compare_cdf = pd.concat(dfs)
dates = set(compare_cdf.datetimes)
set(compare_cdf.Condition)

{'Clear',
 'Dim',
 'Sea ice',
 'Sea ice ',
 'Shadow',
 'Shadow, sea ice',
 'Thin clouds'}

In [78]:
#test
dimbright_df = compare_cdf[compare_cdf['Condition'] == 'Dim']
seaice_df = pd.concat([compare_cdf[compare_cdf['Condition'] == 'Sea ice '], compare_cdf[compare_cdf['Condition'] == 'Sea ice']])
clear_df = compare_cdf[compare_cdf['Condition'] == 'Clear']
thinclouds_df = compare_cdf[compare_cdf['Condition'] == 'Thin clouds']
shadow_df = pd.concat([compare_cdf[compare_cdf['Condition'] == 'Shadow'], compare_cdf[compare_cdf['Condition'] == 'Shadow, sea ice']])
good_df = pd.concat([dimbright_df, clear_df, thinclouds_df])

In [64]:
# #all
# DB = pd.concat([compare_cdf[compare_cdf['Condition'] == 'Dim'], compare_cdf[compare_cdf['Condition'] == 'Dim, sea ice ']])
# CL = pd.concat([compare_cdf[compare_cdf['Condition'] == 'Clear'], compare_cdf[compare_cdf['Condition'] == 'Clear ']])
# SI = pd.concat([compare_cdf[compare_cdf['Condition'] == 'Sea ice'], compare_cdf[compare_cdf['Condition'] == 'Sea ice  ']])
# TC = pd.concat([compare_cdf[compare_cdf['Condition'] == 'Thin clouds'], compare_cdf[compare_cdf['Condition'] == 'Thin clouds '], compare_cdf[compare_cdf['Condition'] == 'Thin clouds, sea ice']])
# SH = pd.concat([compare_cdf[compare_cdf['Condition'] == 'Shadow'], compare_cdf[compare_cdf['Condition'] == 'Shadow, clear'], compare_cdf[compare_cdf['Condition'] == 'Shadow, sea ice'], compare_cdf[compare_cdf['Condition'] == 'Shadow, sea ice ']])

In [81]:
df = good_df
df = df.reset_index(drop=True)
print(len(df))
df.head()

29


Unnamed: 0,BoxID_x,datetimes,Scene_x,tpos_x,Condition,BoxID_y,Scene_y,tpos_y,diff
0,1,2015-06-26,LC80350052015177LGN00,406.109591,Dim,1,LC08_L1TP_035005_20150626_20170226_01_T1,405.0,1.109591
1,1,2015-06-26,LC80350052015177LGN00,375.074993,Dim,1,LC08_L1TP_035005_20150626_20170226_01_T1,375.0,0.074993
2,120,2014-12-11,LC82330172014345LGN00,572.216087,Dim,120,LC08_L1TP_233017_20141211_20170416_01_T1,547.5,24.716087
3,120,2014-12-11,LC82330172014345LGN00,524.370158,Dim,120,LC08_L1TP_233017_20141211_20170416_01_T1,513.75,10.620158
4,120,2014-12-11,LC82330172014345LGN00,502.346059,Dim,120,LC08_L1TP_233017_20141211_20170416_01_T1,491.25,11.096059


In [74]:
# # drop repeated rows
# df = df.drop([16])
# df

In [82]:
misfit = np.array(df['diff'])
# # misfit = [1080.0, 390.0, 45.0, 105.0]
print("n =",len(misfit))
# print(misfit)
# print(np.average(misfit))
print(np.nanmedian(misfit))
# print(np.std(misfit))
print("MAD = ", np.nanmedian(abs(misfit-np.nanmedian(misfit))))
# print(np.std(misfit)/np.average(misfit))

n = 29
10.620157903746986
MAD =  6.372369147127927


In [76]:
# misfit-np.median(misfit)

## Theta calculation

In [4]:
# #SIGMAS (DATA ERRORS) ALONG EACH FLOWLINE (FROM INTERANALYST DIFFERENCES)
# sigmas = [35.02, 27.65, 30.45]
# sigma_avg = np.average(sigmas); print(sigma_avg)

In [47]:
cdfs

[   BoxID_x   datetimes                Scene_x      tpos_x         Condition  \
 0      259  2013-08-18  LC82330152013230LGN00  892.783568           Sea ice   
 1      259  2013-09-26  LC80010152013269LGN00  809.235751           Sea ice   
 2      259  2013-09-28  LC82320152013271LGN00  871.872984           Sea ice   
 3      259  2013-10-05  LC82330152013278LGN00  809.235751           Sea ice   
 4      259  2014-05-10  LC82320152014130LGN00         NaN           Sea ice   
 5      259  2014-05-26  LC82320152014146LGN00         NaN           Sea ice   
 6      259  2014-07-04  LC82330152014185LGN00         NaN           Sea ice   
 7      259  2014-08-12  LC80010152014224LGN00  838.726714           Sea ice   
 8      259  2014-08-21  LC82330152014233LGN00  289.503886           Sea ice   
 9      259  2014-09-06  LC82330152014249LGN00         NaN   Cloudy, sea ice   
 10     259  2014-10-15  LC80010152014288LGN00  331.530542           Sea ice   
 11     259  2014-11-02  LC8232015201430

In [69]:
theta1s = []; theta2s = []; compare_dfs = []
#FOR EACH GLACIER BOXID:
BoxIDs = list(set(manual_df.BoxID))
for BoxID in BoxIDs:
    print("Box"+BoxID)
    #grab automated tpos
    auto50 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline50_filtered.csv', dtype=str,sep=',')
    auto25 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline25_filtered.csv', dtype=str,sep=',')
    auto75 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline75_filtered.csv', dtype=str,sep=',')
    autodfs = [auto50, auto25, auto75]
    #grab manual tpos that corresponds to just boxID
    manual50_df = manual50[manual50.BoxID == BoxID].copy()
    manual25_df = manual25[manual25.BoxID == BoxID].copy()
    manual75_df = manual75[manual75.BoxID == BoxID].copy()
    manualdfs = [manual50_df, manual25_df, manual75_df]
    #calculate difference in terminus positions along the three flowlines
    lists3 = []; lists3_norm = []
    for i in range(0, len(manualdfs)):
        man = manualdfs[i]; auto = autodfs[i]; # sigma = sigmas[i]
        compare_df = man.merge(auto, how='inner', on=['datetimes'])
        #cast terminus positions into float values
        compare_df = compare_df.astype({'tpos_x': 'float', 'tpos_y': 'float'})
        #subtract the absolute value of the difference and put into df as a column named "diff"
        compare_df['diff'] = abs(np.array(compare_df.tpos_x) - np.array(compare_df.tpos_y))
#         compare_df['diff/sigma'] = abs(np.array(compare_df.tpos_x) - np.array(compare_df.tpos_y))/sigma
        lists3.append(list(compare_df['diff']))  
#         lists3_norm.append(list(compare_df['diff/sigma']))
    diff_all = lists3[0]+lists3[1]+lists3[2] #list of all the differences between manual and auto
#     normalizeddiff_all = lists3_norm[0]+lists3_norm[1]+lists3_norm[2] #list of all the normalized differences
    
    N = len(diff_all) #number of total intersections
    
    #CALCULATE THETA:
#     theta1 = (1.0/N)*np.sum(normalizeddiff_all) #sum of normalized differences along flowlines
    theta2 = (1.0/N)*(np.nansum(diff_all)) #sum of differences normalized by average sigma
#     theta1s.append(theta1); 
    theta2s.append(theta2)
    print("Theta values:",theta2)
    
    compare_dfs.append(compare_df)

Box120
Theta values: 108.03952155560408
Box174
Theta values: 253.09239737745898
Box002
Theta values: 344.2793399204759
Box259
Theta values: 354.46125446329745
Box001
Theta values: 244.018222814385


In [67]:
#CALCULATE OVERALL THETA and write results to csv
theta1_all = np.average(theta1s)
theta2_all = np.average(theta2s)

#organize data
columns = ['Theta_avg']+BoxIDs
theta1_for_df = [theta1_all]+theta1s
theta2_for_df = [theta2_all]+theta2s
#write to csv
pd.DataFrame(list(zip(columns, theta1_for_df, theta2_for_df)), 
             columns=['ID', 'theta1', 'theta2']).to_csv(manual_path+'thetas.csv', sep=',') 

#ADJUST FILENAME TO INCLUDE PARAMETERS OR SOMETHING

In [None]:
#SPLIT INTO 3 DATAFRAMES FOR 3 FLOWLINES:
manual50 = manual_df[['BoxID','datetimes', 'intersect_x', 'intersect_y', 
                                      'tpos50']].copy().reset_index(drop=True).rename(columns={"tpos50": "tpos"})
manual25 = manual_df[['BoxID','datetimes', 'intersect_x', 'intersect_y', 
                                      'tpos25']].copy().reset_index(drop=True).rename(columns={"tpos25": "tpos"})
manual75 = manual_df[['BoxID','datetimes', 'intersect_x', 'intersect_y',
                                      'tpos75']].copy().reset_index(drop=True).rename(columns={"tpos75": "tpos"})

In [74]:
def calc_theta(manual_df):
    #SPLIT INTO 3 DATAFRAMES FOR 3 FLOWLINES:
    manual50 = manual_df[['BoxID','datetimes', 'intersect_x', 'intersect_y', 
                                          'tpos50']].copy().reset_index(drop=True).rename(columns={"tpos50": "tpos"})
    manual25 = manual_df[['BoxID','datetimes', 'intersect_x', 'intersect_y', 
                                          'tpos25']].copy().reset_index(drop=True).rename(columns={"tpos25": "tpos"})
    manual75 = manual_df[['BoxID','datetimes', 'intersect_x', 'intersect_y',
                                          'tpos75']].copy().reset_index(drop=True).rename(columns={"tpos75": "tpos"})
    thetas = []
    #FOR EACH GLACIER BOXID:
    BoxIDs = list(set(manual_df.BoxID))
    for BoxID in BoxIDs:
        print("Box"+BoxID)
        #grab automated tpos
        auto50 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline50_filtered.csv', dtype=str,sep=',')
        auto25 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline25_filtered.csv', dtype=str,sep=',')
        auto75 = pd.read_csv(auto_path+'Tpos_Box'+BoxID+'_flowline75_filtered.csv', dtype=str,sep=',')
        autodfs = [auto50, auto25, auto75]
        #grab manual tpos that corresponds to just boxID
        manual50_df = manual50[manual50.BoxID == BoxID].copy()
        manual25_df = manual25[manual25.BoxID == BoxID].copy()
        manual75_df = manual75[manual75.BoxID == BoxID].copy()
        manualdfs = [manual50_df, manual25_df, manual75_df]
        #calculate difference in terminus positions along the three flowlines
        lists3 = []; lists3_norm = []
        for i in range(0, len(manualdfs)):
            man = manualdfs[i]; auto = autodfs[i]; # sigma = sigmas[i]
            compare_df = man.merge(auto, how='inner', on=['datetimes'])
            #cast terminus positions into float values
            compare_df = compare_df.astype({'tpos_x': 'float', 'tpos_y': 'float'})
            #subtract the absolute value of the difference and put into df as a column named "diff"
            compare_df['diff'] = abs(np.array(compare_df.tpos_x) - np.array(compare_df.tpos_y))  
            lists3.append(list(compare_df['diff']))  
        diff_all = lists3[0]+lists3[1]+lists3[2] #list of all the differences between manual and auto
    #     normalizeddiff_all = lists3_norm[0]+lists3_norm[1]+lists3_norm[2] #list of all the normalized differences

        N = len(diff_all) #number of total intersections

        #CALCULATE THETA:
        theta = (1.0/N)*(np.nansum(diff_all)) #sum of differences normalized by average sigma
        thetas.append(theta)
        print("Theta values:",theta)
        
                
    #CALCULATE OVERALL THETA
    theta_all = np.nanmean(thetas)
    #organize data in dataframe
    column_titles = ['Theta_avg']+BoxIDs
    theta_for_df = [theta_all]+thetas
    #write to csv
    theta_df = pd.DataFrame(list(zip(column_titles, theta_for_df)), 
                 columns=['ID', 'theta'])
    
    return theta_df

In [75]:
calc_theta(manual_df)

Box120
Theta values: 108.03952155560408
Box174
Theta values: 253.09239737745898
Box002
Theta values: 344.2793399204759
Box259
Theta values: 354.46125446329745
Box001
Theta values: 244.018222814385


Unnamed: 0,ID,theta
0,Theta_avg,260.778147
1,120,108.039522
2,174,253.092397
3,002,344.27934
4,259,354.461254
5,001,244.018223
