# Imputation

### Goal: impute the 3'rd element of y.

In [49]:
import numpy as np
y = np.array([3.64973815,  2.99549717,  0, 2.9468194, 3.63628318, 2.20029144, 
              3.58499797, 2.30541867, 2.44001707,  1.91864557, 2.33640267, 0.71044036, 
              1.25885197,  1.175712,    1.82684075,  1.090066, 1.73614439,  1.84846206,
              2.72661025,  3.56022295, 3.59882147,  5.28710702,  6.03863768,  6.76454015,
              7.82283933,  8.07749541,  9.15639392,  9.62977085, 10.6195424,  11.56870074, 
              11.60384656, 12.13092488, 12.30468568, 12.41261552, 12.53969402, 12.75664092])

In [50]:
print("Observation length:", len(y))
print("Observation summary:", y.sum())

Observation length: 36
Observation summary: 196.25971857000002


### For the best prediction model assume L = 6 for square page matrix P(y,L).

In [51]:
L = 6
P = np.array([y[L*i:L*(i+1)] for i in range (0, L)]).T
U, S, V = np.linalg.svd(P, full_matrices=False)
print('Page matrix and its SVD-decomposition:\n')
print(P, '\n\n', U, '\n\n', np.diag(S), '\n\n', V)


Page matrix and its SVD-decomposition:

[[ 3.64973815  3.58499797  1.25885197  2.72661025  7.82283933 11.60384656]
 [ 2.99549717  2.30541867  1.175712    3.56022295  8.07749541 12.13092488]
 [ 0.          2.44001707  1.82684075  3.59882147  9.15639392 12.30468568]
 [ 2.9468194   1.91864557  1.090066    5.28710702  9.62977085 12.41261552]
 [ 3.63628318  2.33640267  1.73614439  6.03863768 10.6195424  12.53969402]
 [ 2.20029144  0.71044036  1.84846206  6.76454015 11.56870074 12.75664092]] 

 [[-0.36567663  0.66124833  0.17931665 -0.30769076 -0.29185838  0.46585605]
 [-0.37787482  0.3396218   0.04742714  0.65560428 -0.22892235 -0.507342  ]
 [-0.38695874  0.04875619 -0.88434158 -0.20326974  0.13205386 -0.08407692]
 [-0.41518916 -0.07069521  0.17169039  0.33851648  0.72696809  0.38738424]
 [-0.44195048 -0.1501345   0.39119417 -0.55523641  0.18451268 -0.53551295]
 [-0.45392938 -0.64613646  0.03202663  0.10634827 -0.53145789  0.28578425]] 

 [[40.85517291  0.          0.          0.          0

### Calculate r-value, minimum at which the next formula > 0.9

In [52]:
print([(S[:r]*S[:r]).sum()/(S*S).sum() for r in range (1,L+1)])

[0.9836692348865267, 0.9936651843823147, 0.9992947848191771, 0.999863700154724, 0.9999724974593821, 1.0]


In [53]:
r = 1
S_hat = S[:r]
S_hat = np.append(S_hat, [0] * (L-r))
print(np.diag(S_hat))

[[40.85517291  0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]]


### Set P = 35/36 as we need to impute only one value

In [54]:
P = (len(y)-1)/len(y)
print(np.dot(U, np.dot(np.diag(S_hat),V))*(1/P))

[[ 2.36803058  1.98517043  1.38055058  4.38919413  8.80090786 11.31185273]
 [ 2.44702299  2.05139145  1.42660279  4.53560822  9.09448724 11.68919184]
 [ 2.50584823  2.1007059   1.46089762  4.64464203  9.31311428 11.97019431]
 [ 2.68866138  2.25396205  1.56747681  4.98349001  9.99254876 12.84347499]
 [ 2.86196101  2.39924282  1.66850968  5.3047045  10.63662579 13.67131055]
 [ 2.93953336  2.46427337  1.71373399  5.44848646 10.92492743 14.04186615]]


# Forecasting
### Goal: forecast the next 3 elements of y

In [55]:
import numpy as np
y = np.array([2.9468194, 3.63628318, 2.20029144, 3.58499797, 2.30541867, 2.44001707, 
              1.91864557, 2.33640267, 0.71044036, 1.25885197,  1.175712, 1.82684075, 
              1.090066, 1.73614439, 1.84846206, 2.72661025,  3.56022295, 3.59882147, 
              5.28710702,  6.03863768,  6.76454015, 7.82283933,  8.07749541,  9.15639392, 
              9.62977085, 10.6195424,  11.56870074, 11.60384656, 12.13092488, 12.30468568,
              12.41261552, 12.53969402, 12.75664092, 0, 0, 0])

In [56]:
print("Observation length:", len(y))
print("Observation summary:", y.sum())

Observation length: 36
Observation summary: 189.61448325


### For the best prediction model assume L = 6 for square page matrix P(y,L).

In [57]:
L = 6
P = np.array([y[L*i:L*(i+1)] for i in range (0, L)]).T
U, S, V = np.linalg.svd(P, full_matrices=False)
print('Page matrix and its SVD-decomposition:\n')
print(P, '\n\n', U, '\n\n', np.diag(S), '\n\n', V)


Page matrix and its SVD-decomposition:

[[ 2.9468194   1.91864557  1.090066    5.28710702  9.62977085 12.41261552]
 [ 3.63628318  2.33640267  1.73614439  6.03863768 10.6195424  12.53969402]
 [ 2.20029144  0.71044036  1.84846206  6.76454015 11.56870074 12.75664092]
 [ 3.58499797  1.25885197  2.72661025  7.82283933 11.60384656  0.        ]
 [ 2.30541867  1.175712    3.56022295  8.07749541 12.13092488  0.        ]
 [ 2.44001707  1.82684075  3.59882147  9.15639392 12.30468568  0.        ]] 

 [[-0.41912369 -0.39244973  0.23415354  0.07122461  0.07275833 -0.77789713]
 [-0.45566525 -0.35597249  0.4688456   0.23139737 -0.27762037  0.56144346]
 [-0.47439235 -0.3376221  -0.70140117 -0.29154441  0.20278501  0.20707413]
 [-0.34812119  0.42516057  0.40759339 -0.67299943  0.27460987  0.05982431]
 [-0.35819394  0.4475536  -0.24342196 -0.00609397 -0.76187715 -0.17788957]
 [-0.37704793  0.4735107  -0.08947459  0.63515258  0.46972761  0.03942022]] 

 [[37.81617258  0.          0.          0.          0

### Calculate r-value, minimum at which the next formula > 0.9

In [58]:
print([(S[:r]*S[:r]).sum()/(S*S).sum() for r in range (1,L+1)])

[0.8539919476855975, 0.9968361633688743, 0.9991160390686421, 0.999798129453401, 0.9999574802339725, 1.0]


In [59]:
r = 2
S_hat = S[:r]
S_hat = np.append(S_hat, [0] * (L-r))
print(np.diag(S_hat))

[[37.81617258  0.          0.          0.          0.          0.        ]
 [ 0.         15.46614269  0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]]


### Set P = 33/36 as we need to forecast three values

In [73]:
P = (len(y)-3)/len(y)
print(np.dot(U, np.dot(np.diag(S_hat),V))*(1/P))

[[ 2.90143735  2.10330966  1.46253121  4.64096358  9.31408753 11.97440166]
 [ 2.99450816  2.17077854  1.50944553  4.78983401  9.61286003 12.35851035]
 [ 3.14057971  2.27666872  1.583076    5.02348122 10.08177355 12.96135617]
 [ 3.28847204  2.38387881  1.65762428  5.26004084 10.55653206 13.57171645]
 [ 3.50275145  2.53921421  1.76563637  5.60278921 11.24440394 14.45606011]
 [ 3.59041254  2.60276148  1.80982381  5.74300659 11.52581036 14.81784257]]


# Learning Stationary and Non-Stationary Components
### Goal: forecast the next 3 elements of y

In [65]:
import numpy as np
y = np.array([3.64973815, 2.99549717, 2.56877414, 2.9468194, 3.63628318, 2.20029144, 
              3.58499797,  2.30541867, 2.44001707, 1.91864557, 2.33640267, 0.71044036, 
              1.25885197, 1.175712, 1.82684075, 1.090066, 1.73614439, 1.84846206, 
              2.72661025,  3.56022295, 3.59882147, 5.28710702, 6.03863768, 6.76454015,
              7.82283933,  8.07749541,  9.15639392, 9.62977085, 10.6195424, 11.56870074, 
              11.60384656, 12.13092488, 12.30468568, 12.41261552, 12.53969402, 12.75664092])

In [66]:
print("Observation length:", len(y))
print("Observation summary:", y.sum())

Observation length: 36
Observation summary: 198.82849271


### For the best prediction model assume L = 6 for square page matrix P(y,L).

In [67]:
L = 6
P = np.array([y[L*i:L*(i+1)] for i in range (0, L)]).T
U, S, V = np.linalg.svd(P, full_matrices=False)
print('Page matrix and its SVD-decomposition:\n')
print(P, '\n\n', U, '\n\n', np.diag(S), '\n\n', V)

Page matrix and its SVD-decomposition:

[[ 3.64973815  3.58499797  1.25885197  2.72661025  7.82283933 11.60384656]
 [ 2.99549717  2.30541867  1.175712    3.56022295  8.07749541 12.13092488]
 [ 2.56877414  2.44001707  1.82684075  3.59882147  9.15639392 12.30468568]
 [ 2.9468194   1.91864557  1.090066    5.28710702  9.62977085 12.41261552]
 [ 3.63628318  2.33640267  1.73614439  6.03863768 10.6195424  12.53969402]
 [ 2.20029144  0.71044036  1.84846206  6.76454015 11.56870074 12.75664092]] 

 [[-0.36490139  0.61780028  0.25661129 -0.2047275   0.33536406  0.51472131]
 [-0.37660651  0.30337223 -0.28898697  0.55407881 -0.60012743  0.12435072]
 [-0.39497731  0.20859735 -0.53057907 -0.52887558  0.01924209 -0.48875996]
 [-0.41357709 -0.10315992  0.08844954  0.55443188  0.59748375 -0.38223966]
 [-0.4405261  -0.18216267  0.70023828 -0.21670469 -0.40749599 -0.26345011]
 [-0.45155086 -0.66253305 -0.26639456 -0.13045428  0.06299164  0.5149752 ]] 

 [[41.02168371  0.          0.          0.          0

### Calculate r-value, minimum at which the next formula > 0.9

In [69]:
print([(S[:r]*S[:r]).sum()/(S*S).sum() for r in range (1,L+1)])

[0.9878622082422648, 0.9983156145945801, 0.9994800758016784, 0.9999535820762996, 0.9999956952735227, 1.0]


In [75]:
r = 1
S_hat = S[:r]
S_hat = np.append(S_hat, [0] * (L-r))
print(np.diag(S_hat))

[[41.02168371  0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]]


### Set P = 1 as we not actually forecasting or imputting any values at this moment

In [79]:
P = len(y)/len(y)
P_hat = np.dot(U, np.dot(np.diag(S_hat),V))*(1/P)
print(P_hat)

[[ 2.65965091  1.92803386  1.34065361  4.25421661  8.53791357 10.97653486]
 [ 2.74496582  1.98988033  1.3836584   4.39068118  8.81178836 11.32863448]
 [ 2.87886474  2.08694632  1.451153    4.60485778  9.24162575 11.88124315]
 [ 3.0144327   2.18522224  1.51948892  4.8217041   9.67682105 12.44074008]
 [ 3.21085549  2.32761303  1.61850001  5.13589011 10.30737028 13.25138843]
 [ 3.2912115   2.38586469  1.65900516  5.26442271 10.56532617 13.58302236]]


In [104]:
Y = np.array([P_hat[L-1]])
Phi = P_hat[:L-1]
b = np.array([[0]*(L-1)])

In [107]:
print(Y, '\n\n', Phi, '\n\n', b)

[[ 3.2912115   2.38586469  1.65900516  5.26442271 10.56532617 13.58302236]] 

 [[ 2.65965091  1.92803386  1.34065361  4.25421661  8.53791357 10.97653486]
 [ 2.74496582  1.98988033  1.3836584   4.39068118  8.81178836 11.32863448]
 [ 2.87886474  2.08694632  1.451153    4.60485778  9.24162575 11.88124315]
 [ 3.0144327   2.18522224  1.51948892  4.8217041   9.67682105 12.44074008]
 [ 3.21085549  2.32761303  1.61850001  5.13589011 10.30737028 13.25138843]] 

 [[0 0 0 0 0]]


In [124]:
B, residuals, rank, s = np.linalg.lstsq(Phi.T, Y.T, rcond=None)
print('Parameters estimation:\n', B)

Parameters estimation:
 [[0.20697295]
 [0.21361212]
 [0.22403208]
 [0.23458192]
 [0.24986746]]


In [153]:
predict = P_hat.T[5]
y_hat = np.array([0.0]*3)
for i in range(0,3):
    y_hat[i] = np.array([B[len(B)-1-k]*predict[len(predict)-1-k] for k in range(0, L-1)]).sum()
    predict = np.append(predict, np.array([y_hat[i]]))
predict

array([10.97653486, 11.32863448, 11.88124315, 12.44074008, 13.25138843,
       13.58302236, 14.17231459, 14.81285647, 15.47440602])