In [2]:
import sys
nb_out = sys.stdout

In [91]:
import tsaugmentation as tsag
sys.stdout = nb_out
import matplotlib.pyplot as plt
from itertools import product

In [4]:
data = tsag.preprocessing.PreprocessDatasets('tourism').apply_preprocess()

Number of groups: 4
	state: 7
	zone: 27
	region: 76
	purpose: 4
Total number of series: 304
Number of points per series for train: 204
Total number of points: 228
Seasonality: 12
Forecast horizon: 24


In [5]:
train = data['train']['data']

In [6]:
train

array([[ 307, 2142,  202, ...,    3,    0,    5],
       [ 367,  609,  159, ...,    0,    0,    1],
       [ 543,  388,  100, ...,    0,    0,    2],
       ...,
       [ 589,  272,  174, ...,   15,    1,    0],
       [ 507,  269,  304, ...,    0,    1,    0],
       [ 768,  222,   72, ...,    2,    1,    0]])

# Build the Adjencency Matrix

The adjencency matrix to build:

$$ \begin{matrix} 1 & 1 & 1& 1 \\
1 & 1 & 0 & 0 \\
0 & 0 & 1 & 1\\
1 & 0 & 0 & 0\\
0 & 1 & 0 & 0 \\
0 & 0 & 1 & 0 \\
0 & 0 & 0 & 1\end{matrix}$$

In [7]:
from sklearn.preprocessing import OneHotEncoder
import numpy as np

In [8]:
groups = []
for group in data['train']['groups_names'].keys():
    groups.append(data['train']['groups_names'][group][data['train']['groups_idx'][group]].reshape(-1,1))

In [9]:
groups_arr = np.squeeze(np.array(groups)).T

In [10]:
enc = OneHotEncoder().fit(groups_arr)

In [11]:
# Adjencency Matrix
A = np.concatenate((
    np.ones(data['train']['s']).reshape(1,-1),
    enc.transform(groups_arr).toarray().T,
    np.eye(data['train']['s'])),
    axis=0
)

In [14]:
visitors_train = (A @ train.T).T

# Static Graph Temporal Signal

### Extract the node features

Features for each node:
- Average number of visits at that node
- Number of connections (e.g. top node has 4 connections -> state, zone, region, purpose)

Only use train data to avoid leaking information into the train set. 

In [36]:
average_node_visitors = visitors_train.mean(axis=0)
count_node_connections = A.sum(axis=1)

In [37]:
# Normalize features between 0 and 1
average_node_visitors = (average_node_visitors - np.min(average_node_visitors)) / (np.max(average_node_visitors) - np.min(average_node_visitors))
count_node_connections = (count_node_connections - np.min(count_node_connections)) / (np.max(count_node_connections) - np.min(count_node_connections))

In [38]:
node_features = np.concatenate((average_node_visitors.reshape(-1,1), count_node_connections.reshape(-1,1)), axis=1)

In [39]:
node_features.shape

(419, 2)

In [42]:
node_features[:20]

array([[1.        , 1.        ],
       [0.31919814, 0.18151815],
       [0.19545039, 0.27392739],
       [0.26144788, 0.15511551],
       [0.06955159, 0.15511551],
       [0.09909199, 0.06270627],
       [0.03128532, 0.06270627],
       [0.0236574 , 0.08910891],
       [0.09275328, 0.02310231],
       [0.08784076, 0.02310231],
       [0.04025019, 0.00990099],
       [0.03631043, 0.04950495],
       [0.04227172, 0.04950495],
       [0.01950734, 0.00990099],
       [0.09064624, 0.03630363],
       [0.01762422, 0.00990099],
       [0.02553969, 0.03630363],
       [0.03132767, 0.07590759],
       [0.03010104, 0.10231023],
       [0.14830333, 0.03630363]])

### Extract the edges

Build the graph structure

In [111]:
levels = []
for i in range(6):
    if i == 0:
        levels.append('total')
    elif i == 1:
        levels.append(list(data['train']['groups_names'].keys()))
    elif i == 2:
        levels.append([list(v) for k, v in data['train']['groups_names'].items() if k in ('state', 'purpose')])
    elif i == 3:
        levels.append([list(v) for k, v in data['train']['groups_names'].items() if k in ('zone')])
    elif i == 4:
        levels.append([list(v) for k, v in data['train']['groups_names'].items() if k in ('region')])
    elif i == 5:
        levels.append(list(product(*[list(v) for k, v in data['train']['groups_names'].items() if k in ('region', 'purpose')])))
    
levels

['total',
 ['state', 'zone', 'region', 'purpose'],
 [['A', 'B', 'C', 'D', 'E', 'F', 'G'], ['Bus', 'Hol', 'Oth', 'Vis']],
 [['AA',
   'AB',
   'AC',
   'AD',
   'AE',
   'AF',
   'BA',
   'BB',
   'BC',
   'BD',
   'BE',
   'CA',
   'CB',
   'CC',
   'CD',
   'DA',
   'DB',
   'DC',
   'DD',
   'EA',
   'EB',
   'EC',
   'FA',
   'FB',
   'FC',
   'GA',
   'GB']],
 [['AAA',
   'AAB',
   'ABA',
   'ABB',
   'ACA',
   'ADA',
   'ADB',
   'ADC',
   'ADD',
   'AEA',
   'AEB',
   'AEC',
   'AED',
   'AFA',
   'BAA',
   'BAB',
   'BAC',
   'BBA',
   'BCA',
   'BCB',
   'BCC',
   'BDA',
   'BDB',
   'BDC',
   'BDD',
   'BDE',
   'BDF',
   'BEA',
   'BEB',
   'BEC',
   'BED',
   'BEE',
   'BEF',
   'BEG',
   'BEH',
   'CAA',
   'CAB',
   'CAC',
   'CBA',
   'CBB',
   'CBC',
   'CBD',
   'CCA',
   'CCB',
   'CCC',
   'CDA',
   'CDB',
   'DAA',
   'DAB',
   'DAC',
   'DBA',
   'DBB',
   'DBC',
   'DCA',
   'DCB',
   'DCC',
   'DCD',
   'DDA',
   'DDB',
   'EAA',
   'EAB',
   'EAC',
   'EBA',
   '

In [116]:
def flatten(mylist):
    flatlist = []
    for element in mylist:
        if type(element) == list:
            flatlist += flatten(element)
    else:
        flatlist += element
    return flatlist

In [117]:
flatten(levels)

['p',
 'u',
 'r',
 'p',
 'o',
 's',
 'e',
 'G',
 'V',
 'i',
 's',
 'Bus',
 'Hol',
 'Oth',
 'Vis',
 'G',
 'B',
 'AA',
 'AB',
 'AC',
 'AD',
 'AE',
 'AF',
 'BA',
 'BB',
 'BC',
 'BD',
 'BE',
 'CA',
 'CB',
 'CC',
 'CD',
 'DA',
 'DB',
 'DC',
 'DD',
 'EA',
 'EB',
 'EC',
 'FA',
 'FB',
 'FC',
 'GA',
 'GB',
 'G',
 'B',
 'D',
 'AAA',
 'AAB',
 'ABA',
 'ABB',
 'ACA',
 'ADA',
 'ADB',
 'ADC',
 'ADD',
 'AEA',
 'AEB',
 'AEC',
 'AED',
 'AFA',
 'BAA',
 'BAB',
 'BAC',
 'BBA',
 'BCA',
 'BCB',
 'BCC',
 'BDA',
 'BDB',
 'BDC',
 'BDD',
 'BDE',
 'BDF',
 'BEA',
 'BEB',
 'BEC',
 'BED',
 'BEE',
 'BEF',
 'BEG',
 'BEH',
 'CAA',
 'CAB',
 'CAC',
 'CBA',
 'CBB',
 'CBC',
 'CBD',
 'CCA',
 'CCB',
 'CCC',
 'CDA',
 'CDB',
 'DAA',
 'DAB',
 'DAC',
 'DBA',
 'DBB',
 'DBC',
 'DCA',
 'DCB',
 'DCC',
 'DCD',
 'DDA',
 'DDB',
 'EAA',
 'EAB',
 'EAC',
 'EBA',
 'ECA',
 'FAA',
 'FBA',
 'FBB',
 'FCA',
 'FCB',
 'GAA',
 'GAB',
 'GAC',
 'GBA',
 'GBB',
 'GBC',
 'GBD',
 'GBD',
 'Vis',
 ('AAA', 'Bus'),
 ('AAA', 'Hol'),
 ('AAA', 'Oth'),
 ('AAA',

In [110]:
len(levels[5])

304

In [71]:
A = np.array([
        np.ones(4),
        np.ones(4),
        np.ones(4),
        np.array([1, 0, 1, 0]),
        np.array([0, 1, 0, 1]),
        np.array([1, 0, 1, 0]),
        np.array([0, 1, 0, 1]),
        np.array([1, 0, 0, 0]),
        np.array([0, 1, 0, 0]),
        np.array([0, 0, 1, 0]),
        np.array([0, 0, 0, 1]),
])

In [72]:
A

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 0., 1., 0.],
       [0., 1., 0., 1.],
       [1., 0., 1., 0.],
       [0., 1., 0., 1.],
       [1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [73]:
A.shape

(11, 4)

In [92]:
A[3:3+4] @ A[3+4:]

array([[1., 0., 1., 0.],
       [0., 1., 0., 1.],
       [1., 0., 1., 0.],
       [0., 1., 0., 1.]])