In [1]:
import numpy as np
from scipy import stats
import pandas as pd

from tessa.data import DataFields3D, Dataset

In [2]:
# data from http://kdd.ics.uci.edu/databases/el_nino/el_nino.html

data_link = 'http://kdd.ics.uci.edu/databases/el_nino/elnino.gz'
data_desc = 'http://kdd.ics.uci.edu/databases/el_nino/elnino.col'

# Preparing dataset

In [3]:
cols = pd.read_csv(data_desc, header=None, squeeze=True).to_list()
data = (
    pd.read_csv(data_link, header=None, delim_whitespace=True, names=cols)
    .drop_duplicates(keep='last', subset=['buoy', 'day'])
    .query('humidity != "."')
    .astype({'humidity': float})
    .assign(z_humidity = lambda x: stats.zscore(x['humidity'].values))
)
data.head()

Unnamed: 0,buoy,day,latitude,longitude,zon.winds,mer.winds,humidity,air temp.,s.s.temp.,z_himidity
0,1,1,8.96,-140.32,-6.3,-6.4,83.5,27.32,27.57,-0.187797
1,1,2,8.95,-140.32,-5.7,-3.6,86.4,26.7,27.62,0.380607
2,1,3,8.96,-140.32,-6.2,-5.8,83.0,27.36,27.68,-0.285797
3,1,4,8.96,-140.34,-6.4,-5.3,82.2,27.32,27.7,-0.442598
4,1,5,8.96,-140.33,-4.9,-6.2,87.3,27.09,27.85,0.557008


In [4]:
data_fields = DataFields3D("latitude", "longitude", "day", weights='z_humidity')
data_fields

DataFields3D(x='latitude', y='longitude', z='day', weights='z_himidity')

In [5]:
dataset = Dataset(data, data_fields, name='El Nino')

In [6]:
with dataset.format('spatio_temporal_tensor'):
    *tensor_data, data_index = dataset.data

In [7]:
idx, vals, shape = tensor_data
print(f'Tensor with shape {shape} density: {idx.shape[0] / np.prod(shape):.1%}')

Tensor with shape (84, 104, 14) density: 0.5%


# Standard TF

In [8]:
from tessa.tensor import sa_hooi

In [9]:
factors = sa_hooi(idx, vals, shape, (10, 12, 3), seed=123)

growth of the core: 1.0
growth of the core: 0.13644255235912423
growth of the core: 0.00992252683820765
growth of the core: 0.0018855732624303285
growth of the core: 0.006855716042608132
growth of the core: 0.015220333607973171
growth of the core: 0.004625363430119546
growth of the core: 0.00034016064863996113
Core is no longer growing. Norm of the core: 15.925474510603975.


# TESSA

In [10]:
from tessa.tensor_ssa import hankel_hooi

In [12]:
factors = hankel_hooi(idx, shape, (10, 12, 2, 3), attention_span=5, seed=123)

growth of the core: 1.0
growth of the core: 0.23636808370427304
growth of the core: 0.0574351923053036
growth of the core: 0.011564970294758435
growth of the core: 0.003975690794733302
growth of the core: 0.006555721655183811
growth of the core: 0.16197488296520626
growth of the core: 0.28832039616614186
growth of the core: 0.07912938936363921
growth of the core: 0.029478611531998808
growth of the core: 0.02618140560434757
growth of the core: 0.01221688250381175
growth of the core: 0.006359553902480272
growth of the core: 0.00483107975209087
growth of the core: 0.004608047802471062
growth of the core: 0.004167084210266833
growth of the core: 0.003034234844602325
growth of the core: 0.0018003906504887382
growth of the core: 0.0009939331335558658
Core is no longer growing. Norm of the core: 20.36608587601201.
