In [4]:
import pandas as pd
import glob
# from statsmodels.tsa.api import SimpleExpSmoothing

In [5]:
cols = ['Year', 'Reporter Code', 'Reporter', 'Partner Code', 'Partner', 'Trade Value (US$)']
schema = {'Year': str, 'Reporter Code': str, 'Partner Code': str}
def dfs():
    for file in glob.glob('data/*.csv'):
        yield pd.read_csv(file, encoding='latin-1', usecols=cols, 
                          dtype=schema)
df = pd.concat(dfs())
df.dropna(inplace=True)
df.rename(columns={'Reporter Code': 'Reporter_Code', 'Partner Code': 'Partner_Code', 'Trade Value (US$)': 'Trade_Value'}, inplace=True)

In [6]:
reporters = set(df.Reporter_Code.unique())
print(len(reporters), "Source nodes are present. (Some have invalid data so dropped)")
df_ = df[df.Partner_Code.isin(reporters)]  #induced sub-graph creation
print("Filtered records from", len(df), "to", len(df_))

143 Source nodes are present. (Some have invalid data so dropped)
Filtered records from 74077 to 66316


In [7]:
# Filtered DF is essentially an induced graph now. Let's save this
df_.to_csv('filtered_df.csv', header=True)

In [8]:
countries = df_.Reporter.unique()
country_to_index = dict(zip(countries, range(len(countries))))
index_to_country = dict(map(reversed, country_to_index.items()));

In [9]:
# We can now start creating tensors
import numpy as np
# First dimension is for time (10 years)
# Second dimension is for TO
N = len(countries)
tensor = np.zeros((10, N, N))

In [11]:
for i, year in enumerate(['2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021']):
    # Data of that particular year
    df__ = df_[df_.Year == year]
    # For the scale of this data, I think we can just iterate over it.
    for _, row in df__.iterrows():
        country_from = row['Partner']
        country_to = row['Reporter']
        tensor[i, country_to_index[country_from], country_to_index[country_to]] = row['Trade_Value']

In [12]:
sparsity = 1.0 - np.count_nonzero(tensor) / tensor.size
print(f"{sparsity*100:.2f}% of the tensor is sparse")

67.58% of the tensor is sparse


In [13]:
tensor

array([[[0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,
         0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
        [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,
         0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
        [0.0000000e+00, 2.2651200e+05, 0.0000000e+00, ...,
         0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
        ...,
        [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,
         0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
        [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,
         0.0000000e+00, 0.0000000e+00, 3.5345950e+06],
        [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,
         0.0000000e+00, 1.6782854e+07, 0.0000000e+00]],

       [[0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,
         0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
        [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,
         0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
        [0.0000000e+00, 1.5125220e+06, 0.0000000e+00, ...,
         0.000

In [168]:
!pip install tensorly

Collecting tensorly
  Downloading tensorly-0.7.0-py3-none-any.whl (198 kB)
Collecting nose
  Downloading nose-1.3.7-py3-none-any.whl (154 kB)
Installing collected packages: nose, tensorly
Successfully installed nose-1.3.7 tensorly-0.7.0


In [14]:
# Normalizing our tensor. For the first version, let's just calculate the log
non_zeros = (tensor!=0)
tensor[non_zeros] = np.log(tensor[non_zeros])
tensor

array([[[ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        , 12.3305532 ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        , 15.07810928],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         16.63586833,  0.        ]],

       [[ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        , 14.22928901,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  

In [15]:
from tensorly.decomposition import parafac

# Can tinker with ranks
# Mask has a special significance, we should really discuss and explore more about it
weights, factors = parafac(tensor, rank=150, normalize_factors=True, mask=non_zeros, random_state=42)


Shape of A (143, 150)
Shape of B (143, 150)
Shape of T (150,)
Shape of W (150,)
Shape of np.diag(W*T) (150, 150)


array([ 6.03127935, 18.26250606, 12.36066673, ..., 16.11366776,
        9.5186101 , 16.40344908])

In [24]:
def get_slice(A, B, T, W):
    # Given the two matrices, get the slice
    print("Shape of A", A.shape)
    print("Shape of B", B.shape)
    print("Shape of T", T.shape)
    print("Shape of W", W.shape)
    print ("Shape of np.diag(W*T)", np.diag(W*T).shape)
    return np.matmul(np.matmul(A, np.diag(W*T)), B.T)

sl = get_slice(factors[1], factors[2], factors[0][9], weights)
sl[non_zeros[9]]

Shape of A (143, 150)
Shape of B (143, 150)
Shape of T (150,)
Shape of W (150,)
Shape of np.diag(W*T) (150, 150)


array([ 8.2407961 , 17.4379725 , 18.29482108, ..., 12.02849359,
        2.70938954,  8.6371312 ])

In [16]:
sl.shape[0]

143

In [17]:
# recursive implementation of exponential smoothing
# Input: list of vectors, alpha
# Output: Next vector
def exponential_smoothing_recursive(vectors, alpha, depth=-1):
    if depth==-1:                          # -1 means the last one
        depth = len(vectors)-1
    if depth == 0:                        # For the first vector, return as is.
        return vectors[0]
    return alpha * vectors[depth] + \
            (1-alpha) * exponential_smoothing_recursive(vectors, alpha, depth-1)

exponential_smoothing_recursive([1,2,3], .5)

2.25

In [19]:
next_C = exponential_smoothing_recursive(factors[0], alpha=0.5)
forecast = get_slice(factors[1], factors[2], next_C, weights)

Shape of A (143, 150)
Shape of B (143, 150)
Shape of T (150,)
Shape of W (150,)
Shape of np.diag(W*T) (150, 150)


In [26]:
forecast

array([[  9.82829926,   6.61673888,  -1.78462682, ...,   7.02907563,
         -5.43188148,   1.78511954],
       [  4.12806772,  -2.06278572,  -2.97944865, ...,   7.02575257,
          6.29520915,  12.81436731],
       [ -9.91431114,   7.66564336,  -6.18766149, ..., -10.25835763,
        -14.22246397,  -7.10533903],
       ...,
       [  0.65855749,   6.08872709,   2.94974035, ...,  -3.33873051,
         -3.62777346,  -1.32412016],
       [  4.63556707,   0.68410393,  -1.24979029, ...,   4.21367018,
         10.15100677,  13.03973923],
       [  0.86817148,   0.01445498,   3.81983913, ...,  -1.68165606,
         12.07075546,   2.62945647]])

In [52]:
# based on paper
# factor matrix C
# predict same col in next time slice C(K+1)
# takes in factors produced by parafac and years to forecast (default is 1) 
# returns list of forecasted vectors, last n in list is the forecasted n years

# Looks like you are obtaining the forecasts f1, f2, f3, etc using the temporal fronts t1, t2, t3
# and then smoothing the forecasts i.e. f_n+1
# The correct approach here is to smooth the temporal fronts i.e. get t_n+1 and then
# use it to generate the f_n+1.
def exponential_smoothing(factors, weights, forecast = 1):
    num_slices = len(factors[0])  # Temporal factors
    alpha = 0.5
    
    sl = get_slice(factors[1], factors[2], factors[0][0], weights)  # slice_0
    forecasts = [sl]
    
    for i in range(0, num_slices + forecast):
        # get slice
        s_t = forecasts[i]
        s_t1 = np.zeros((s_t.shape[0], s_t.shape[1]))
        
        past_c_k_r = []
        for k in range(0, s_t.shape[0]):
            
            for r in range(0, s_t.shape[1]):
                c_k_r = s_t[k][r]
                next_c_k_r = 0
                           
                for num in range(0, k):
                    smooth = alpha * ((1-alpha) ** num)
                    next_c_k_r += smooth * past_c_k_r[-num]
                
                past_c_k_r.append(next_c_k_r)
                    
                s_t1[k,r] = next_c_k_r
                
        forecasts.append(s_t1)
        
    return forecasts
         
        
        

In [53]:
# testing expo smoothing
forecasts = exponential_smoothing(factors, weights)
print(forecasts[-1])

Shape of A (143, 150)
Shape of B (143, 150)
Shape of T (150,)
Shape of W (150,)
Shape of np.diag(W*T) (150, 150)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [54]:
forecasts[2]


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

array([ 6.08449941, 17.9035443 , 12.39185072, ..., 16.69321177,
        9.42843118, 16.63586833])