In [None]:
import os
import requests
import zipfile
import pandas as pd
import glacierml as gl
from PIL import Image
import numpy as np

In [None]:
home_path = '/path/to_project/directory'
[
        data_path, RGI_path, glathida_path, ref_path,
        coregistration_testing_path, 
        arch_test_path, LOO_path
] = gl.set_paths(home_path)

In [None]:
RGI_zip_path = os.path.join(RGI_path,'RGI.zip')
RGI_download_link = 'https://www.glims.org/RGI/rgi60_files/00_rgi60_attribs.zip'

In [None]:
glathida_zip_path = os.path.join(glathida_path,'glathida.zip')
glathida_download_link = 'https://www.gtn-g.ch/database/glathida-3.1.0.zip'

In [None]:
ref_zip_path = os.path.join(ref_path,'')
ref_download_link = 'https://www.research-collection.ethz.ch/bitstream/handle/20.500.11850/315707/composite_thickness_RGI60-all_regions.zip?sequence=2&isAllowed=y'

In [None]:
def download_data(save_path,download_link,data):
    if not os.path.exists(save_path):
        os.makedirs(save_path)
        print('data directory created')
    else:
        print('data directory exists')

    files = os.listdir(save_path)

    if not files:
        response = requests.get(download_link, stream=True)
        if response.status_code == 200:
            print('downloading...')
            with open(os.path.join(save_path,data + '.zip'), 'wb') as file:
                file.write(response.content)
                print('download complete')

    else:
        zip_files = [file for file in files if file.endswith('.zip')]
        if zip_files:
            print('directory contains .zip files:', zip_files)

In [None]:
download_data(
    glathida_path,
    glathida_download_link,
    data = 'glathida'
)

In [None]:
download_data(
    RGI_path,
    RGI_download_link,
    data = 'RGI'
)

In [None]:
download_data(
    ref_path,
    ref_download_link,
    data = 'reference_thicknesses'
)

In [None]:
glathida_file = 'glathida-3.1.0/data/T.csv'
zip_path = os.path.join(glathida_path, 'glathida.zip')
destination_file = os.path.join(glathida_path, 'T.csv') 

with zipfile.ZipFile(zip_path, 'r') as zipped_file:
    if os.path.exists(destination_file):
        print(f'data already unzipped to {glathida_path}')
    else:
        if glathida_file in zipped_file.namelist():
            with zipped_file.open(
                glathida_file) as source, open(
                destination_file, 'wb') as dest:
                dest.write(source.read())
            print(f'data unzipped to {glathida_path}')


In [None]:
zip_path = os.path.join(RGI_path,'RGI.zip')
with zipfile.ZipFile(zip_path, 'r') as zipped_file:
    if os.path.exists(os.path.join(RGI_path,'*.csv')):
        print(f'data already unzipped to {RGI_path}')
    else:
        zipped_file.extractall(RGI_path)
        print(f'data unzipped to {RGI_path}')

In [None]:
zip_path = os.path.join(ref_path,'reference_thicknesses.zip')
with zipfile.ZipFile(zip_path, 'r') as zipped_file:
    if os.path.exists(os.path.join(ref_path,'RGI*')):
        print(f'data already unzipped to {ref_path}')
    else:
        zipped_file.extractall(ref_path)
        print(f'data unzipped to {ref_path}')

In [None]:
RGIIds = []
means = []
for folder in os.listdir(ref_path):
    if folder.startswith('RGI'):
        print(folder)
        region_path = os.path.join(ref_path, folder)
        for file in os.listdir(region_path):
            glac_path = os.path.join(region_path, file)
            
            # print(file)
            im = Image.open(glac_path)
            arr = np.array(im)
            
            # Check if the array contains non-zero values
            if np.count_nonzero(arr) > 0:
                arr[arr == 0] = np.nan
                mean_thickness = np.nanmean(arr)
                means.append(mean_thickness)
                RGIIds.append(file[:-14])
            else:
                print(f"Array in file {file} is empty. Skipping...")
            
df = pd.concat(
    [
        pd.Series(RGIIds, name='RGIId'),
        pd.Series(means, name='FMT'),
    ], axis=1
)
df.to_pickle(os.path.join(ref_path,'refs.pkl'))