# Fitting ALPS through GCV

Interpolates data to daily values. Adapted from source below. Needs the accompanying Functions.py file to run. The code, as written, expects the input data as .csv files beginning with a 3-digit Glacier Identification code (glacierid). Default code assumes the data is saved with a column 'Date' in datetime format and a 'Mean Variable' column with the data you wish to interpolate.

Interpolated data is saved as a .csv file without headers with three columns of data: decimal year, interpolated data, date in YYYYMMDD format.

Source: 
Shekhar, P., Csatho, B., Schenk, T., Roberts, C., and Patra, A. K.: Alps: a unified framework for modeling time series of land ice changes, IEEE Transactions on Geoscience and Remote Sensing, 59, 6466–6481, 2020.

Access: https://github.com/pshekhar-tufts/ALPS

## Import Dependencies

In [1]:
import os
import pickle
from Functions import *
from matplotlib.pyplot import *
import numpy as np
import pandas as pd
from datetime import datetime as dt
from datetime import datetime, timedelta
import time
from datetime import date

import warnings
warnings.filterwarnings('ignore')

In [2]:
def toYearFraction(date):
    def sinceEpoch(date): # returns seconds since epoch
        return time.mktime(date.timetuple())
    s = sinceEpoch

    year = date.year
    startOfThisYear = dt(year=year, month=1, day=1)
    startOfNextYear = dt(year=year+1, month=1, day=1)

    yearElapsed = s(date) - s(startOfThisYear)
    yearDuration = s(startOfNextYear) - s(startOfThisYear)
    fraction = yearElapsed/yearDuration

    return date.year + fraction

## Main function

In [None]:
# set a glacierid threshold. this should be 0 unless you need to rerun the code starting after a certain input line number.
threshold = 0

# Define the directory path. Edit as needed to your path.
in_dir  = '/Users/.../original/'
out_dir = '/.../interpolated/'

# Get all filenames in the directory and sort them by the first three digits (glacierid)
csv_files = [filename for filename in os.listdir(in_dir)] 
csv_files.sort(key=lambda x: int(x[:3]))  # Sort by the first three digits (glacierid)

filtered_files = [filename for filename in csv_files if int(filename[:3]) > threshold]

# Loop through the sorted list of files
for filename in filtered_files:
    if filename.endswith('.csv'):
        glacierid = filename[:3]  # Get the first three digits as 'glacierid'
        print(f"Processing glacier {glacierid}...")

        # Load the data for glacierid glacier
        data = pd.read_csv(os.path.join(in_dir, filename))
        data['Date'] = pd.to_datetime(data['Date'])
        data = data.dropna(subset=['Mean Variable']).reset_index(drop=True)
        # find the mean monthly value if needed - ALPS crashes with too many data points
        data = data.groupby(pd.PeriodIndex(data['Date'], freq="M"))['Mean Variable'].mean().reset_index()
        data['Date'] = data['Date'].astype(str)
        data['Date'] = pd.to_datetime(data['Date'])  # Confirm datetime format
        data_arrays = data[['Date', 'Mean Variable']].to_numpy()

        # setup data for processing, create decimal date column 'Dec_Date'
        decdate = [] 
        for i in range(len(data_arrays)):
            new = data_arrays[i, 0]  
            new = toYearFraction(new)  
            decdate.append(new) 
        data['Dec_Date'] = decdate
        data_arrays = data[['Dec_Date', 'Mean Variable']].to_numpy()

        # Calculate the number of days between first and last dates
        d0 = data['Date'].iloc[0]
        d1 = data['Date'].iloc[-1]
        delta = d1 - d0
        num = delta.days + 1

        # setup interpolation. See source paper for discussion on changing values of p and q
        p = 4
        q = 1
        [n, lamb, sigmasq] = full_search_nk(data_arrays, p, q)
        c = n + p
        U = Kno_pspline_opt(data_arrays, p, n)
        B = Basis_Pspline(n, p, U, data_arrays[:, 0])
        P = Penalty_p(q, c)
        theta = np.linalg.solve(B.T.dot(B) + lamb * P, B.T.dot(data_arrays[:, 1].reshape(-1, 1)))

        # Getting the mean of the prediction
        xpred = np.linspace(data_arrays[0, 0], data_arrays[-1, 0], num)
        Bpred = Basis_Pspline(n, p, U, xpred)
        ypred1 = Bpred.dot(theta)
        std_t1, std_n1 = Var_bounds(data_arrays, Bpred, B, theta, P, lamb)

        # Save the interpolated data as an array, adding an integer date column
        ypred1 = np.squeeze(ypred1)
        sdate = data['Date'].iloc[0]
        last = data['Date'].iloc[-1]
        edate = last + timedelta(days=1)
        dates = pd.date_range(sdate, edate - timedelta(days=1), freq='d')
        datesint = [0] * len(dates)
        for i in range(len(dates)):
            datesint[i] = int(dates[i].strftime('%Y%m%d'))
        ALPS_array = np.array((xpred, ypred1, datesint)).T 

        # Save the results with the glacierid in the filename
        output_filename = os.path.join(out_dir, f"{glacierid}_variable.csv")
        np.savetxt(output_filename, ALPS_array, delimiter=",")  # Save as a .csv
        print(f"Saved interpolated data for glacier {glacierid} to {output_filename}")


Saved interpolated data for glacier 080 to /Users/kevin/Documents/ML_longterm/variables_data/geometric/time_series/interpolated/080_slope_interpolated.csv
Processing glacier 085...
Saved interpolated data for glacier 085 to /Users/kevin/Documents/ML_longterm/variables_data/geometric/time_series/interpolated/085_slope_interpolated.csv
Processing glacier 088...
Saved interpolated data for glacier 088 to /Users/kevin/Documents/ML_longterm/variables_data/geometric/time_series/interpolated/088_slope_interpolated.csv
Processing glacier 090...
Saved interpolated data for glacier 090 to /Users/kevin/Documents/ML_longterm/variables_data/geometric/time_series/interpolated/090_slope_interpolated.csv
Processing glacier 091...
Saved interpolated data for glacier 091 to /Users/kevin/Documents/ML_longterm/variables_data/geometric/time_series/interpolated/091_slope_interpolated.csv
Processing glacier 097...
Saved interpolated data for glacier 097 to /Users/kevin/Documents/ML_longterm/variables_data/ge

Saved interpolated data for glacier 243 to /Users/kevin/Documents/ML_longterm/variables_data/geometric/time_series/interpolated/243_slope_interpolated.csv
Processing glacier 246...
Saved interpolated data for glacier 246 to /Users/kevin/Documents/ML_longterm/variables_data/geometric/time_series/interpolated/246_slope_interpolated.csv
Processing glacier 247...
Saved interpolated data for glacier 247 to /Users/kevin/Documents/ML_longterm/variables_data/geometric/time_series/interpolated/247_slope_interpolated.csv
Processing glacier 248...
Saved interpolated data for glacier 248 to /Users/kevin/Documents/ML_longterm/variables_data/geometric/time_series/interpolated/248_slope_interpolated.csv
Processing glacier 251...
Saved interpolated data for glacier 251 to /Users/kevin/Documents/ML_longterm/variables_data/geometric/time_series/interpolated/251_slope_interpolated.csv
Processing glacier 252...
Saved interpolated data for glacier 252 to /Users/kevin/Documents/ML_longterm/variables_data/ge