In [1]:
import os
import requests
import zipfile
import pandas as pd
import glacierml as gl
from PIL import Image
import numpy as np
from tqdm import tqdm
import glob
import path_manager as pm
[
        home_path, data_path, RGI_path, glathida_path, 
        ref_path, coregistration_testing_path, 
        arch_test_path, LOO_path
] = pm.set_paths()

print(np.__version__)

1.23.5


In [2]:
RGI_zip_path = os.path.join(RGI_path,'RGI.zip')
RGI_download_link = 'https://www.glims.org/RGI/rgi60_files/00_rgi60_attribs.zip'

In [3]:
glathida_zip_path = os.path.join(glathida_path,'glathida.zip')
glathida_download_link = 'https://www.gtn-g.ch/database/glathida-3.1.0.zip'

In [4]:
ref_zip_path = os.path.join(ref_path,'')
ref_download_link = 'https://www.research-collection.ethz.ch/bitstream/handle/20.500.11850/315707/composite_thickness_RGI60-all_regions.zip?sequence=2&isAllowed=y'

In [5]:
def download_data(save_path, download_link, data):
    if not os.path.exists(save_path):
        os.makedirs(save_path)
        print('data directory created')
    else:
        print('data directory exists')

    files = os.listdir(save_path)

    if not files:
        response = requests.get(download_link, stream=True)
        
        if response.status_code == 200:
            total_size = int(response.headers.get('content-length', 0))  # Total size in bytes
            
            print('downloading...')
            # Download with progress bar
            with open(os.path.join(save_path, data + '.zip'), 'wb') as file:
                with tqdm(total=total_size, unit='B', unit_scale=True, desc=data) as progress_bar:
                    for chunk in response.iter_content(1024):  # Download in 1 KB chunks
                        if chunk:  # Filter out keep-alive chunks
                            file.write(chunk)
                            progress_bar.update(len(chunk))  # Update progress bar
            print('download complete')

    else:
        zip_files = [file for file in files if file.endswith('.zip')]
        if zip_files:
            print('directory contains .zip files:', zip_files)


In [6]:
download_data(
    glathida_path,
    glathida_download_link,
    data = 'glathida'
)

data directory exists
directory contains .zip files: ['glathida.zip']


In [7]:
download_data(
    RGI_path,
    RGI_download_link,
    data = 'RGI'
)

data directory exists
directory contains .zip files: ['RGI.zip']


In [8]:
download_data(
    ref_path,
    ref_download_link,
    data = 'refs'
)

data directory exists
directory contains .zip files: ['refs.zip']


In [9]:
glathida_file = 'glathida-3.1.0/data/T.csv'
zip_path = os.path.join(glathida_path, 'glathida.zip')
destination_file = os.path.join(glathida_path, 'T.csv')

with zipfile.ZipFile(zip_path, 'r') as zipped_file:
    if os.path.exists(destination_file):
        print(f'data already unzipped to {glathida_path}')
    else:
        if glathida_file in zipped_file.namelist():
            with zipped_file.open(glathida_file) as source, open(destination_file, 'wb') as dest:
                # Get the total size of the file to unzip
                file_size = zipped_file.getinfo(glathida_file).file_size
                
                # Initialize the tqdm progress bar
                with tqdm(total=file_size, unit='B', unit_scale=True, desc='Unzipping') as progress_bar:
                    # Read in chunks and update the progress bar
                    for chunk in iter(lambda: source.read(1024), b''):  # Read in 1 KB chunks
                        dest.write(chunk)
                        progress_bar.update(len(chunk))  # Update the progress bar
            print(f'data unzipped to {glathida_path}')


data already unzipped to /home/simonhans/glacierml/data/glathida


In [10]:
zip_path = os.path.join(RGI_path, 'RGI.zip')
csv_files = glob.glob(os.path.join(RGI_path, '*.csv'))

with zipfile.ZipFile(zip_path, 'r') as zipped_file:
    if csv_files:
        print(f'data already unzipped to {RGI_path}')
    else:
        # Get the list of all files in the ZIP
        file_list = zipped_file.namelist()
        total_files = len(file_list)
        
        # Initialize progress bar for file extraction
        with tqdm(total=total_files, unit='file', desc='Unzipping files') as progress_bar:
            # Extract each file and update the progress bar
            for file in file_list:
                zipped_file.extract(file, RGI_path)
                progress_bar.update(1)  # Update the progress bar after extracting each file
        
        print(f'data unzipped to {RGI_path}')


data already unzipped to /home/simonhans/glacierml/data/RGI


In [11]:
zip_path = os.path.join(ref_path, 'refs.zip')

if os.path.exists(os.path.join(ref_path, 'refs.pkl')):
    print(f'data already unzipped to {ref_path}')
else:
    with zipfile.ZipFile(zip_path, 'r') as zipped_file:
        # Get the list of all files in the ZIP
        file_list = zipped_file.namelist()
        total_files = len(file_list)
        
        # Initialize progress bar for file extraction
        with tqdm(total=total_files, unit='file', desc='Unzipping files') as progress_bar:
            # Extract each file and update the progress bar
            for file in file_list:
                zipped_file.extract(file, ref_path)
                progress_bar.update(1)  # Update the progress bar after extracting each file
        
        print(f'data unzipped to {ref_path}')


Unzipping files: 100%|██████████████| 215563/215563 [00:49<00:00, 4345.04file/s]

data unzipped to /home/simonhans/glacierml/data/ref





In [12]:
RGIIds = []
means = []

if not os.path.exists(os.path.join(ref_path, 'refs.pkl')):
    print('Finding mean of Farinotti et al. 2019 estimates')

    # Get the list of RGI folders to process
    rgi_folders = [folder for folder in os.listdir(ref_path) if folder.startswith('RGI')]

    # Initialize a progress bar for the RGI folders
    for folder in tqdm(rgi_folders, desc="Processing RGI folders"):
        region_path = os.path.join(ref_path, folder)

        # Get the list of files in each RGI folder
        glac_files = os.listdir(region_path)

        # Initialize a progress bar for the files within the folder
        for file in tqdm(glac_files, desc=f"Processing files in {folder}", leave=False):
            glac_path = os.path.join(region_path, file)

            # Open and process each image
            im = Image.open(glac_path)
            arr = np.array(im)

            # Check if the array contains non-zero values
            if np.count_nonzero(arr) > 0:
                arr[arr == 0] = np.nan
                mean_thickness = np.nanmean(arr)
                means.append(mean_thickness)
                RGIIds.append(file[:-14])
            else:
                print(f"Array in file {file} is empty. Skipping...")

    # Create DataFrame and save it as a pickle file
    df = pd.concat(
        [
            pd.Series(RGIIds, name='RGIId'),
            pd.Series(means, name='FMT'),
        ], axis=1
    )
    df.to_pickle(os.path.join(ref_path, 'refs.pkl'))
    print('all done')

else:
    print('already done')


Finding mean of Farinotti et al. 2019 estimates


Processing RGI folders:   0%|                            | 0/19 [00:00<?, ?it/s]
Processing files in RGI60-08:   0%|                    | 0/3417 [00:00<?, ?it/s][A
Processing files in RGI60-08:   1%|          | 25/3417 [00:00<00:13, 247.75it/s][A
Processing files in RGI60-08:   4%|▎        | 126/3417 [00:00<00:04, 692.09it/s][A
Processing files in RGI60-08:   7%|▌        | 236/3417 [00:00<00:03, 875.42it/s][A
Processing files in RGI60-08:  10%|▉        | 341/3417 [00:00<00:03, 943.99it/s][A
Processing files in RGI60-08:  13%|█▏       | 446/3417 [00:00<00:03, 977.54it/s][A
Processing files in RGI60-08:  16%|█▍       | 549/3417 [00:00<00:02, 994.90it/s][A
Processing files in RGI60-08:  19%|█▋       | 649/3417 [00:00<00:02, 986.54it/s][A
Processing files in RGI60-08:  22%|█▊      | 756/3417 [00:00<00:02, 1012.34it/s][A
Processing files in RGI60-08:  25%|██      | 860/3417 [00:00<00:02, 1016.90it/s][A
Processing files in RGI60-08:  28%|██▎     | 971/3417 [00:01<00:02, 1043.74it/s

Array in file RGI60-06.00560_thickness.tif is empty. Skipping...



Processing files in RGI60-06:  69%|██████▉   | 391/568 [00:00<00:00, 480.75it/s][A
Processing files in RGI60-06:  77%|███████▋  | 440/568 [00:00<00:00, 479.81it/s][A
Processing files in RGI60-06:  86%|████████▋ | 490/568 [00:01<00:00, 485.57it/s][A
Processing files in RGI60-06:  97%|█████████▋| 550/568 [00:01<00:00, 510.19it/s][A
Processing RGI folders:  37%|███████▎            | 7/19 [01:40<01:53,  9.43s/it][A
Processing files in RGI60-12:   0%|                    | 0/1887 [00:00<?, ?it/s][A
Processing files in RGI60-12:   4%|▍         | 80/1887 [00:00<00:02, 799.01it/s][A
Processing files in RGI60-12:   9%|▊        | 166/1887 [00:00<00:02, 832.07it/s][A
Processing files in RGI60-12:  13%|█▏       | 250/1887 [00:00<00:01, 821.62it/s][A
Processing files in RGI60-12:  18%|█▌       | 335/1887 [00:00<00:01, 830.05it/s][A
Processing files in RGI60-12:  22%|█▉       | 419/1887 [00:00<00:01, 806.76it/s][A
Processing files in RGI60-12:  27%|██▍      | 509/1887 [00:00<00:01, 835.31

Array in file RGI60-19.01323_thickness.tif is empty. Skipping...



Processing files in RGI60-19:  12%|█        | 321/2751 [00:00<00:03, 639.31it/s][A
Processing files in RGI60-19:  14%|█▎       | 385/2751 [00:00<00:03, 601.80it/s][A
Processing files in RGI60-19:  17%|█▍       | 456/2751 [00:00<00:03, 634.97it/s][A
Processing files in RGI60-19:  19%|█▋       | 533/2751 [00:00<00:03, 674.59it/s][A

Array in file RGI60-19.01432_thickness.tif is empty. Skipping...



Processing files in RGI60-19:  22%|█▉       | 601/2751 [00:00<00:03, 651.23it/s][A
Processing files in RGI60-19:  24%|██▏      | 673/2751 [00:01<00:03, 667.93it/s][A
Processing files in RGI60-19:  27%|██▍      | 744/2751 [00:01<00:02, 679.21it/s][A
Processing files in RGI60-19:  30%|██▋      | 813/2751 [00:01<00:02, 661.55it/s][A

Array in file RGI60-19.01313_thickness.tif is empty. Skipping...



Processing files in RGI60-19:  32%|██▉      | 880/2751 [00:01<00:02, 663.56it/s][A
Processing files in RGI60-19:  34%|███      | 947/2751 [00:01<00:02, 664.14it/s][A
Processing files in RGI60-19:  37%|██▉     | 1014/2751 [00:01<00:02, 659.50it/s][A
Processing files in RGI60-19:  39%|███▏    | 1081/2751 [00:01<00:02, 640.64it/s][A
Processing files in RGI60-19:  42%|███▎    | 1146/2751 [00:01<00:02, 618.37it/s][A
Processing files in RGI60-19:  44%|███▌    | 1209/2751 [00:01<00:02, 588.90it/s][A

Array in file RGI60-19.01429_thickness.tif is empty. Skipping...



Processing files in RGI60-19:  46%|███▋    | 1272/2751 [00:02<00:02, 597.22it/s][A
Processing files in RGI60-19:  48%|███▉    | 1333/2751 [00:02<00:02, 579.76it/s][A
Processing files in RGI60-19:  51%|████    | 1392/2751 [00:02<00:02, 563.08it/s][A
Processing files in RGI60-19:  53%|████▏   | 1453/2751 [00:02<00:02, 575.44it/s][A
Processing files in RGI60-19:  55%|████▍   | 1511/2751 [00:02<00:02, 564.65it/s][A

Array in file RGI60-19.00511_thickness.tif is empty. Skipping...



Processing files in RGI60-19:  57%|████▌   | 1575/2751 [00:02<00:02, 584.19it/s][A
Processing files in RGI60-19:  60%|████▊   | 1646/2751 [00:02<00:01, 618.87it/s][A
Processing files in RGI60-19:  62%|████▉   | 1711/2751 [00:02<00:01, 620.92it/s][A
Processing files in RGI60-19:  64%|█████▏  | 1774/2751 [00:02<00:01, 616.99it/s][A
Processing files in RGI60-19:  67%|█████▎  | 1840/2751 [00:02<00:01, 627.11it/s][A
Processing files in RGI60-19:  69%|█████▌  | 1905/2751 [00:03<00:01, 632.86it/s][A
Processing files in RGI60-19:  72%|█████▋  | 1969/2751 [00:03<00:01, 619.69it/s][A
Processing files in RGI60-19:  74%|█████▉  | 2041/2751 [00:03<00:01, 648.71it/s][A

Array in file RGI60-19.00922_thickness.tif is empty. Skipping...



Processing files in RGI60-19:  77%|██████▏ | 2107/2751 [00:03<00:01, 634.66it/s][A
Processing files in RGI60-19:  79%|██████▎ | 2171/2751 [00:03<00:00, 617.28it/s][A
Processing files in RGI60-19:  81%|██████▍ | 2233/2751 [00:03<00:00, 616.10it/s][A

Array in file RGI60-19.01425_thickness.tif is empty. Skipping...



Processing files in RGI60-19:  83%|██████▋ | 2297/2751 [00:03<00:00, 622.01it/s][A
Processing files in RGI60-19:  86%|██████▊ | 2360/2751 [00:03<00:00, 614.13it/s][A
Processing files in RGI60-19:  88%|███████ | 2430/2751 [00:03<00:00, 636.10it/s][A
Processing files in RGI60-19:  91%|███████▎| 2496/2751 [00:03<00:00, 641.39it/s][A
Processing files in RGI60-19:  93%|███████▍| 2561/2751 [00:04<00:00, 610.47it/s][A
Processing files in RGI60-19:  96%|███████▋| 2630/2751 [00:04<00:00, 632.95it/s][A
Processing files in RGI60-19:  98%|███████▊| 2704/2751 [00:04<00:00, 662.70it/s][A
Processing RGI folders:  53%|██████████         | 10/19 [01:51<00:50,  5.56s/it][A

Array in file RGI60-19.01428_thickness.tif is empty. Skipping...



Processing files in RGI60-14:   0%|                   | 0/27986 [00:00<?, ?it/s][A
Processing files in RGI60-14:   0%|         | 89/27986 [00:00<00:31, 887.90it/s][A
Processing files in RGI60-14:   1%|        | 178/27986 [00:00<00:31, 880.53it/s][A
Processing files in RGI60-14:   1%|        | 272/27986 [00:00<00:30, 906.19it/s][A
Processing files in RGI60-14:   1%|        | 367/27986 [00:00<00:29, 923.21it/s][A
Processing files in RGI60-14:   2%|▏       | 460/27986 [00:00<00:29, 921.67it/s][A
Processing files in RGI60-14:   2%|▏       | 553/27986 [00:00<00:30, 914.24it/s][A
Processing files in RGI60-14:   2%|▏       | 648/27986 [00:00<00:29, 925.70it/s][A
Processing files in RGI60-14:   3%|▏       | 741/27986 [00:00<00:29, 926.15it/s][A
Processing files in RGI60-14:   3%|▏       | 834/27986 [00:00<00:30, 900.02it/s][A
Processing files in RGI60-14:   3%|▎       | 926/27986 [00:01<00:29, 904.76it/s][A
Processing files in RGI60-14:   4%|▎      | 1017/27986 [00:01<00:30, 896.68

all done
