In [1]:
# Import libraries
import numpy as np
import xarray as xr
import pandas as pd

In [2]:
# Specify filepaths and filenames.
filepath = 'shared-public/MLinGlaciology/BYOP/data/point_mb_glaciers_norway/'
filename = '2022-10-12_stake_mb_Hydra2_corrected.csv'

In [3]:
# Load data.
data = pd.read_csv(filepath + filename, sep=';')

# Rename columns.
#data = data.rename(columns={"utm_east3": "utm_east_approx", 
#                            "utm_north4": "utm_north_approx", 
#                            "altitude5": "altitude_approx"})

data

FileNotFoundError: [Errno 2] No such file or directory: 'shared-public/MLinGlaciology/BYOP/data/point_mb_glaciers_norway/2022-10-12_stake_mb_Hydra2_corrected.csv'

In [9]:
# New columns indicating if location/altitude is approximate. Fill new column with "N" for
# location/altitude is not approximate.
data['approx_loc'] = 'N'
data['approx_altitude'] = 'N'

# If "utm_east" values are missing, fill column "approx_loc" with "Y" indicating
# that the location is approximate.
data.loc[data['utm_east'].isna(), 'approx_loc'] = 'Y'

# If "altitude" values are missing, fill column "approx_altitude" with "Y"
# indicating that the altitude is approximate.
data.loc[data['altitude'].isna(), 'approx_altitude'] = 'Y'

# Where there is no exact location, fill inn approximate location based on
# "utm_east_approx" and "utm_north_approx" in columns "utm_east" and "utm_north".
# Location is now filled for every observation, with column "approx_loc" indicating
# wether location is approximate (Y) or exact (N).
approx_locs_east = data.loc[data['approx_loc'] == 'Y', 'utm_east_approx']
data.loc[data['approx_loc'] == 'Y', 'utm_east'] = approx_locs_east
approx_locs_north = data.loc[data['approx_loc'] == 'Y', 'utm_north_approx']
data.loc[data['approx_loc'] == 'Y', 'utm_north'] = approx_locs_north

# Same operation with missing altitude, fill in values from "altitude_approx"
# in column "altitude". 
approx_alt = data.loc[data['approx_altitude'] == 'Y', 'altitude_approx']
data.loc[data['approx_altitude'] == 'Y', 'altitude'] = approx_alt 

# Calculate difference between approximate and exact positions and altitude as
# a measure of precision/quality of approximate locations.
data['diff_north'] = data['utm_north'] - data['utm_north_approx']
data['diff_east'] = data['utm_east'] - data['utm_east_approx']
data['diff_altitude'] = data['altitude'] - data['altitude_approx']

In [10]:
# View data
data

Unnamed: 0,glacier_id,utm_zone,utm_east_approx,utm_north_approx,altitude_approx,location_description,location_id,stake_no,altitude,utm_east,...,dt_prev_year_min_date,dt_curr_year_max_date,dt_curr_year_min_date,stake_remark,flag_correction,approx_loc,approx_altitude,diff_north,diff_east,diff_altitude
0,54,34,529400.0,7780050.0,500.0,Nederst på bretunga,L10,L1012,471.0,529480.0,...,25.09.2012 00:00,14.05.2013 00:00,07.11.2013 00:00,,netto,N,N,50.0,80.0,-29.0
1,54,34,529400.0,7780050.0,500.0,Nederst på bretunga,L10,L1013,439.0,529453.0,...,07.11.2013 00:00,09.05.2014 00:00,24.09.2014 00:00,,,N,N,38.0,53.0,-61.0
2,54,34,529400.0,7780050.0,500.0,Nederst på bretunga,L10,L1013,444.0,529438.0,...,24.09.2014 00:00,08.05.2015 00:00,23.09.2015 00:00,,,N,N,32.0,38.0,-56.0
3,54,34,529400.0,7780050.0,500.0,Nederst på bretunga,L10,L1013,444.0,529438.0,...,23.09.2015 00:00,23.05.2016 00:00,22.09.2016 00:00,Snødyp ut fra stakemålingene (0.65 m) virker i...,netto,N,N,32.0,38.0,-56.0
4,54,34,529400.0,7780050.0,500.0,Nederst på bretunga,L10,L1016,468.0,529348.0,...,22.09.2016 00:00,25.04.2017 00:00,29.09.2017 00:00,Sondert snødyp.,,N,N,9.0,-52.0,-32.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4196,3141,32,351800.0,6647500.0,1080.0,Nederst på bretunga,B15,B1509,1078.0,351790.0,...,01.10.2012 00:00,03.05.2012 00:00,12.10.2012 00:00,Kombinasjon med stake 15-12,,N,N,-20.0,-10.0,-2.0
4197,3141,32,351800.0,6647500.0,1080.0,Nederst på bretunga,B15,B1509,1078.0,351787.0,...,12.10.2012 00:00,29.05.2013 00:00,24.09.2013 00:00,Endring av stakelengden fra 7. august til 24. ...,,N,N,-35.0,-13.0,-2.0
4198,3141,32,351800.0,6647500.0,1080.0,Nederst på bretunga,B15,B1514,1079.0,351773.0,...,14.10.2014 00:00,22.04.2015 00:00,14.10.2015 00:00,"Stakemålt snødyp var 3.0 m, mens to sonderinge...",,N,N,-6.0,-27.0,-1.0
4199,3141,32,351800.0,6647500.0,1080.0,Nederst på bretunga,B15,B1514,1078.0,351785.0,...,14.10.2015 00:00,28.04.2016 00:00,06.10.2016 00:00,Basert på sondert snødyp.,,N,N,-20.0,-15.0,-2.0


In [11]:
# 4194 of 4201 points. A total of 7 rows are missing both altitude and altitude_approx.
data_crop_alt = data[data['diff_altitude'].notna()]

# 4053 of 4201 points. A total of 148 rows are missing both exact loc and approx loc.
data_crop_loc = data[data['diff_east'].notna()]

# Cleaned dataset with 4046 instances. A total of 155 points are either missing 
# both exact and approximate coordinates or altitude.
data_crop = data_crop_alt[data_crop_alt['diff_east'].notna()]

# Check balances:
data_crop['diff_netto'] = data_crop['balance_netto'] - (data_crop['balance_winter'] + data_crop['balance_summer'])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_crop['diff_netto'] = data_crop['balance_netto'] - (data_crop['balance_winter'] + data_crop['balance_summer'])


In [12]:
data_crop

Unnamed: 0,glacier_id,utm_zone,utm_east_approx,utm_north_approx,altitude_approx,location_description,location_id,stake_no,altitude,utm_east,...,dt_curr_year_max_date,dt_curr_year_min_date,stake_remark,flag_correction,approx_loc,approx_altitude,diff_north,diff_east,diff_altitude,diff_netto
0,54,34,529400.0,7780050.0,500.0,Nederst på bretunga,L10,L1012,471.0,529480.0,...,14.05.2013 00:00,07.11.2013 00:00,,netto,N,N,50.0,80.0,-29.0,0.000000e+00
1,54,34,529400.0,7780050.0,500.0,Nederst på bretunga,L10,L1013,439.0,529453.0,...,09.05.2014 00:00,24.09.2014 00:00,,,N,N,38.0,53.0,-61.0,0.000000e+00
2,54,34,529400.0,7780050.0,500.0,Nederst på bretunga,L10,L1013,444.0,529438.0,...,08.05.2015 00:00,23.09.2015 00:00,,,N,N,32.0,38.0,-56.0,0.000000e+00
3,54,34,529400.0,7780050.0,500.0,Nederst på bretunga,L10,L1013,444.0,529438.0,...,23.05.2016 00:00,22.09.2016 00:00,Snødyp ut fra stakemålingene (0.65 m) virker i...,netto,N,N,32.0,38.0,-56.0,0.000000e+00
4,54,34,529400.0,7780050.0,500.0,Nederst på bretunga,L10,L1016,468.0,529348.0,...,25.04.2017 00:00,29.09.2017 00:00,Sondert snødyp.,,N,N,9.0,-52.0,-32.0,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4196,3141,32,351800.0,6647500.0,1080.0,Nederst på bretunga,B15,B1509,1078.0,351790.0,...,03.05.2012 00:00,12.10.2012 00:00,Kombinasjon med stake 15-12,,N,N,-20.0,-10.0,-2.0,0.000000e+00
4197,3141,32,351800.0,6647500.0,1080.0,Nederst på bretunga,B15,B1509,1078.0,351787.0,...,29.05.2013 00:00,24.09.2013 00:00,Endring av stakelengden fra 7. august til 24. ...,,N,N,-35.0,-13.0,-2.0,4.440892e-16
4198,3141,32,351800.0,6647500.0,1080.0,Nederst på bretunga,B15,B1514,1079.0,351773.0,...,22.04.2015 00:00,14.10.2015 00:00,"Stakemålt snødyp var 3.0 m, mens to sonderinge...",,N,N,-6.0,-27.0,-1.0,0.000000e+00
4199,3141,32,351800.0,6647500.0,1080.0,Nederst på bretunga,B15,B1514,1078.0,351785.0,...,28.04.2016 00:00,06.10.2016 00:00,Basert på sondert snødyp.,,N,N,-20.0,-15.0,-2.0,0.000000e+00


In [17]:
# Get stats

#%% Stats

# Stats for quality of approximate locations and altitudes:
mean_loc_diff_east = data_crop.loc[data_crop['approx_loc'] != 'Y', 'diff_east'].mean()
min_loc_diff_east = data_crop.loc[data_crop['approx_loc'] != 'Y', 'diff_east'].min()
max_loc_diff_east = data_crop.loc[data_crop['approx_loc'] != 'Y', 'diff_east'].max()
sd_loc_diff_east = data_crop.loc[data_crop['approx_loc'] != 'Y', 'diff_east'].std()

mean_loc_diff_north = data_crop.loc[data_crop['approx_loc'] != 'Y', 'diff_north'].mean()
min_loc_diff_north = data_crop.loc[data_crop['approx_loc'] != 'Y', 'diff_north'].min()
max_loc_diff_north = data_crop.loc[data_crop['approx_loc'] != 'Y', 'diff_north'].max()
sd_loc_diff_north = data_crop.loc[data_crop['approx_loc'] != 'Y', 'diff_north'].std()

mean_altitude_diff = data_crop.loc[data_crop['approx_altitude'] != 'Y', 'diff_altitude'].mean()
min_altitude_diff = data_crop.loc[data_crop['approx_altitude'] != 'Y', 'diff_altitude'].min()
max_altitude_diff = data_crop.loc[data_crop['approx_altitude'] != 'Y', 'diff_altitude'].max()
sd_altitude_diff = data_crop.loc[data_crop['approx_altitude'] != 'Y', 'diff_altitude'].std()

# Number of instances of winter balance: 3680
no_winter = data_crop['balance_winter'].notna().sum()

# Number of instances of summer balance: 3805
no_summer = data_crop['balance_summer'].notna().sum()

# Number of instances of annual balance: 3839
no_annual = data_crop['balance_netto'].notna().sum()

# Number of points with exact coordinates: 3717 of 4046
no_exact_locs = (data_crop['approx_loc'].values == 'N').sum()

# Number of points with approximate coordinates: 329 of 4046
no_approx_locs = (data_crop['approx_loc'].values == 'Y').sum()
    
# Number of points with exact altitude: 4009
no_exact_alt = (data_crop['approx_altitude'].values == 'N').sum()

# Number of points with exact altitude: 37
no_approx_alt = (data_crop['approx_altitude'].values == 'Y').sum()

# Number of points w/o exact coordinates or altitude: 32
no_approx_both = ((data_crop['approx_altitude'].values == 'Y') & (data_crop['approx_loc'].values == 'Y')).sum()
   
# Number of unique glacier IDs: 32
no_unique_id = data_crop['glacier_id'].nunique()    

# List of unique glacier IDs:
list_unique_id = list(data_crop['glacier_id'].unique())

# Unique glacier IDs with number of entries per ID.
# Index is glacier ID and column is number of entries per glacier ID.
len_rec_per_id = data_crop['glacier_id'].value_counts().to_frame()    


In [45]:
# Summary
print('Total number of points: ' + str(data_crop.shape[0]))
print('Number of unique ids: ' + str(no_unique_id))
print('Number of annual mass balance: ' + str(no_annual))
print('Number of winter mass balance: ' + str(no_winter))
print('Number of summer mass balance: ' + str(no_summer))

Total number of points: 4046
Number of unique ids: 32
Number of annual mass balance: 3839
Number of winter mass balance: 3680
Number of summer mass balance: 3805


In [44]:
# Count number of instances of winter, summer and annual mass balance for each glacier
data_pergla = data_crop.groupby('glacier_id').count()
data_pergla[['balance_winter', 'balance_summer','balance_netto']]

#TO-DO: Plot as circles on map


Unnamed: 0_level_0,balance_winter,balance_summer,balance_netto
glacier_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
54,174,172,189
596,122,124,122
675,174,194,178
703,30,31,30
941,71,70,70
1092,19,18,18
1094,222,231,222
1135,29,29,56
1144,70,77,70
1280,57,72,71


In [65]:
#data_crop[data_crop['glacier_id']==2297].groupby("location_id").count()
data_nig_N1000 = data_crop[(data_crop['glacier_id']==2297) & (data_crop['location_id']=='N1000') & (data_crop['balance_netto'].notna())]
#data_nig_N1000['dt_curr_year_min_date']

In [70]:
data_nig_N1000.columns

Index(['glacier_id', 'utm_zone', 'utm_east_approx', 'utm_north_approx',
       'altitude_approx', 'location_description', 'location_id', 'stake_no',
       'altitude', 'utm_east', 'utm_north', 'balance_winter', 'balance_summer',
       'balance_netto', 'dt_prev_year_min_date', 'dt_curr_year_max_date',
       'dt_curr_year_min_date', 'stake_remark', 'flag_correction',
       'approx_loc', 'approx_altitude', 'diff_north', 'diff_east',
       'diff_altitude', 'diff_netto'],
      dtype='object')