# 0.2 Get RGIIDs based on BREID

**Get RGI-IDs for glaciers based on Norwegian glacier id (breid)**

Use shape files from RGI v6 and 1999/2006 glacier inventory of Norway. Map ids via Glims IDs. 

Input dataset is named: 'yyyy-mm-dd_stake_mb_norway_cleaned.csv' 

Output dataset with RGI-IDs is named: 'yyyy-mm-dd_stake_mb_norway_cleaned_ids.csv'

In [1]:
# Libraries
import geopandas as gpd
import pandas as pd

In [2]:
# Filepaths and filenames.
filepath_mb_data = 'Data/'
filename_data = '2023-08-28_stake_mb_norway_cleaned.csv'
filename_data_ids = '2023-08-28_stake_mb_norway_cleaned_ids.csv'
filepath_shapefiles = 'Data/shape_files/'

# Read list of glacier IDs (BREID) as dataframe and get list of BREID.
mb_data = pd.read_csv(filepath_mb_data + filename_data)
breid_list = mb_data['glacier_id'].unique().tolist()
print(breid_list)

[54, 596, 675, 703, 941, 1092, 1094, 1135, 1144, 1280, 2078, 2085, 2148, 2297, 2320, 2474, 2478, 2514, 2597, 2743, 2768, 2769, 2772, 2968, 3126, 3127, 3128, 3129, 3133, 3137, 3138, 3141]


In [4]:
# Get RGI and GLIMS IDs based on list of NVE BREID

def getrgiid(filepath_shp, id_list, save=False, **kwargs):

    # File directories and file names.
    shp_file_breid = 'cryoclim_GAO_NO_1999_2006_UTM_33N.shp'
    shp_file_rgi = '08_rgi60_Scandinavia.shp'

    # Read shape file containing BREID and corresponding GLIMSID as dataframe.
    # Crop dataframe with values of BREID in list of BREID. Drop all columns 
    # except BREID, BRENAVN (glacier name), HOVEDBREAK (main glacier complex
    # acronym) and GLIMSID. 
    df_breid = gpd.read_file(filepath_shapefiles + shp_file_breid)
    df_breid_cropped = df_breid[df_breid['BREID'].isin(id_list)]
    df_breid_glimsid = df_breid_cropped[['BREID','BRENAVN','HOVEDBREAK','GLIMSID']]

    # Sort the dataframe with BREID and corresponding GLIMSID in the order
    # of id_list. 
    df_out = df_breid_glimsid.set_index('BREID').loc[id_list].reset_index(inplace=False)

    # Get list of GLIMSID. 
    glims_list = df_out['GLIMSID'].values.tolist()

    # Read shape file containing GLIMSID and corresponding RGIID as dataframe.
    df_rgi = gpd.read_file(filepath_shp + shp_file_rgi)

    # Get list of RGI ID corresponding to list of GLIMS ID from df_rgi. Dataframe
    # df_rgi contains more values than df_breid, so selection by boolean mask
    # cannot be used. Instead, df_rgi is cropped to contain only rows with GLIMSId
    # from glims_list. Then values are sorted so that the GLIMSId column is in the
    # same order as glims_list. RGIIds are then selected (and are now in the
    # correct order with respect to id_list and glims_list).
    df_rgi_cropped = df_rgi[df_rgi['GLIMSId'].isin(glims_list)]
    df_rgi_cropped.GLIMSId = df_rgi_cropped.GLIMSId.astype("category")
    df_rgi_cropped.GLIMSId.cat.set_categories(glims_list, inplace = True)
    df_rgi_cropped = df_rgi_cropped.sort_values(["GLIMSId"])
    rgi_list = df_rgi_cropped['RGIId'].values.tolist()

    # Add list of RGI ID to dataframe
    # Dataframe now contains breid, Glims ID and RGI ID
    df_out['RGIID'] = rgi_list
    
    # Dictionaries of Glims ID and RGI id with breid as keys.
    breid_rgi = pd.Series(df_out.RGIID.values,index=df_out.BREID).to_dict()
    breid_glims = pd.Series(df_out.GLIMSID.values, index=df_out.BREID).to_dict()
    
    # Map Glims ID and RGI ID to glacier_id in mb_data.
    mb_data['RGIID'] = mb_data["glacier_id"].map(breid_rgi)
    mb_data['GLIMSID'] = mb_data["glacier_id"].map(breid_glims)

    # Rename Norwegian Id column to BREID
    mb_data.rename(columns={'glacier_id':'BREID'}, inplace=True)
    
    # Move ids to front of dataframe
    glims = mb_data.pop('GLIMSID')
    mb_data.insert(0, 'GLIMSID', glims)
    rgi = mb_data.pop('RGIID')
    mb_data.insert(0, 'RGIID', rgi)   

    # Drop old index column
    mb_data.drop(mb_data.columns[2], axis=1, inplace=True)

    #if save==True:

        # Get name of new file to store ids.
        #new_gl_id_file = kwargs.get('filename_new', None)

        # Save new dataframe with all IDs.
        #mb_data.to_csv('C:/Users/kasj/ML_MB_Norway/Data/' + new_gl_id_file, sep=';', index=None)

    # Return dataframe with all ids included.
    return(mb_data)


In [5]:
mb_data_processed = getrgiid(filepath_shapefiles, breid_list)#, filename_new='new_gl_id.txt')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
  df_rgi_cropped.GLIMSId.cat.set_categories(glims_list, inplace = True)


In [9]:
mb_data_processed

Unnamed: 0,RGIID,GLIMSID,BREID,utm_zone,utm_east_approx,utm_north_approx,altitude_approx,location_description,location_id,stake_no,...,dt_curr_year_max_date,dt_curr_year_min_date,stake_remark,flag_correction,approx_loc,approx_altitude,diff_north,diff_east,diff_altitude,diff_netto
0,RGI60-08.01258,G021737E70130N,54,34,529400.0,7780050.0,500.0,Nederst på bretunga,L10,L1012,...,14.05.2013 00:00,07.11.2013 00:00,,netto,N,N,50.0,80.0,-29.0,0.000000e+00
1,RGI60-08.01258,G021737E70130N,54,34,529400.0,7780050.0,500.0,Nederst på bretunga,L10,L1013,...,09.05.2014 00:00,24.09.2014 00:00,,,N,N,38.0,53.0,-61.0,0.000000e+00
2,RGI60-08.01258,G021737E70130N,54,34,529400.0,7780050.0,500.0,Nederst på bretunga,L10,L1013,...,08.05.2015 00:00,23.09.2015 00:00,,,N,N,32.0,38.0,-56.0,0.000000e+00
3,RGI60-08.01258,G021737E70130N,54,34,529400.0,7780050.0,500.0,Nederst på bretunga,L10,L1013,...,23.05.2016 00:00,22.09.2016 00:00,Snødyp ut fra stakemålingene (0.65 m) virker i...,netto,N,N,32.0,38.0,-56.0,0.000000e+00
4,RGI60-08.01258,G021737E70130N,54,34,529400.0,7780050.0,500.0,Nederst på bretunga,L10,L1016,...,25.04.2017 00:00,29.09.2017 00:00,Sondert snødyp.,,N,N,9.0,-52.0,-32.0,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4166,RGI60-08.02969,G006362E59989N,3141,32,351800.0,6647500.0,1080.0,Nederst på bretunga,B15,B1509,...,03.05.2012 00:00,12.10.2012 00:00,Kombinasjon med stake 15-12,,N,N,-20.0,-10.0,-2.0,0.000000e+00
4167,RGI60-08.02969,G006362E59989N,3141,32,351800.0,6647500.0,1080.0,Nederst på bretunga,B15,B1509,...,29.05.2013 00:00,24.09.2013 00:00,Endring av stakelengden fra 7. august til 24. ...,,N,N,-35.0,-13.0,-2.0,4.440892e-16
4168,RGI60-08.02969,G006362E59989N,3141,32,351800.0,6647500.0,1080.0,Nederst på bretunga,B15,B1514,...,22.04.2015 00:00,14.10.2015 00:00,"Stakemålt snødyp var 3.0 m, mens to sonderinge...",,N,N,-6.0,-27.0,-1.0,0.000000e+00
4169,RGI60-08.02969,G006362E59989N,3141,32,351800.0,6647500.0,1080.0,Nederst på bretunga,B15,B1514,...,28.04.2016 00:00,06.10.2016 00:00,Basert på sondert snødyp.,,N,N,-20.0,-15.0,-2.0,0.000000e+00


In [10]:
# Save file with RGI and Glims IDs added

#mb_data.to_csv(filepath_mb_data + filename_data_ids, index=None)
