# Time-Series Decomposition Tool

This tool works with the output of time_series_extraction.ipynb OR the input of any time-series of ice velocity in a .dat or .csv format.

## Import packages

In [1]:
# General
from datetime import date, timedelta , datetime
import numpy as np
import pandas as pd

# Plotting
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import matplotlib.pyplot as plt

# Time-series decomposition
from statsmodels.tsa.seasonal import STL
from statsmodels.tsa.seasonal import seasonal_decompose

## User Inputs

In [55]:
# Glacier name that appears in file name. This will change depending on your personal file naming conventions.
glacier = 'PIG'
full_name = 'Pine Island'

# glacier = 'JAK'
# full_name = 'Jakobshavn Isbrae'

# glacier = 'leo'
# full_name = 'Leonardo Glacier'

# Date format in time-series.
date_format = '%y%m%d' 

## Define Functions

In [38]:
def find_middate(df,date_format):
    df['Date']=np.empty(len(df))
    for i in np.arange(0,len(df)):
        
        # exract first date and convert to ordinal 
        date1 = datetime.strptime(str(int(df['Date1'][i])),date_format) 
        date1 = date.toordinal(date1)
        # exract second date and convert to ordinal 
        date2 = datetime.strptime(str(int(df['Date2'][i])),date_format) 
        date2 = date.toordinal(date2)
        # find middle date and convert to datetime object
        mid_date = (date1+date2)//2
        df['Date'][i] = date.fromordinal(int(mid_date))
    

In [4]:
def decompose(df):
    time_series = df['speed (m/yr)']
    time_series.index = pd.DatetimeIndex(df.Date)
    time_series = time_series.resample('M').mean().ffill()
    plt.rc('figure',figsize=(12,8))
    plt.rc('font',size=15)
    result = STL(time_series, 
              robust=True
             ).fit()
    result.plot()
    plt.show()

In [63]:
def decompose(df):
    time_series = df['speed (m/yr)']
    time_series.index = pd.DatetimeIndex(df.Date)
    time_series = time_series.resample('M').mean().ffill()
    res = STL(time_series, robust=True).fit()

    fig = make_subplots(rows=4, cols=1)

    fig.append_trace(go.Scatter(x=filtered_df['Date'],y=filtered_df['speed (m/yr)'],name='Original Signal'),
                     row=1, col=1)

    fig.append_trace(go.Scatter(x=res.trend.index, y=res.trend.values, name = 'Trend'), 
                     row=2, col=1)

    fig.append_trace(go.Scatter(x=res.seasonal.index, y=res.seasonal.values, name = 'Seasonal'), 
                     row=3, col=1)
    
    fig.append_trace(go.Scatter(x=res.resid.index, y=res.resid.values, name = 'Residual'), 
                     row=4, col=1)

    fig.update_layout(height=1000, width=800,
                     title={'text': "Decomposition of "+full_name+" Glacier Flow Speeds",'y':0.95,'x':0.45,
                            'xanchor': 'center','yanchor': 'top'})

    for i in np.arange(1,5):
        fig.update_xaxes(title_text="Date", row=i, col=1)
        fig.update_yaxes(title_text="Speed (m/yr)", row=i, col=1)


    fig.show()

## Read in data and filter if required

In [56]:
filepath = '/Users/leamhowe/Documents/2021-22 UoLeeds/MRes Research Project/MAD4SAR/CODE/'

# Read in data
df = pd.read_csv(glacier+'_speed_time_series.csv') 

# df = pd.read_csv(filepath+'PIG_speed_timeseries_CPOM.dat') # CPOM data for Pine Island Glacier, Antarctica
# df = pd.read_csv(filepath+'JAK_speed_timeseries_CPOM.dat') # CPOM data for Jakobshavn Isbrae Glacier, Greenland
# df = pd.read_csv(filepath+'Leonardo_Glacier/leonardo_speed_timeseries_CPOM.dat') # CPOM data for Leonardo Glacier, Antarctica Peninsula

# Run if inputting unfiltered dataset and/or dataset has time as a date interval for each velocity measurments
df = pd.DataFrame(df.to_numpy(),columns = ['Date1','Date2','speed (m/yr)'])  
find_middate(df,date_format) # convert date interval to one mid date.
# filter out bad data and sort
filtered_df = df.replace(0,np.nan).dropna().sort_values(by='Date')

# Run if inputting a prefiltered with one date
# filtered_df = pd.DataFrame(df.to_numpy(),columns = ['Date','speed (m/yr)']) # If dataset has just one date


## Tool usage

In [64]:
decompose(filtered_df)