# Air Quality Tensor Decomposition

In [14]:
import tensorly as tl
import numpy as np
import pandas as pd
import datetime

tl.set_backend('pytorch') # Or 'mxnet', 'numpy', 'tensorflow' or 'cupy'

# tensor of size 3x4x2
tensor = tl.tensor(np.arange(24).reshape((3, 4, 2)), device='cuda:0')
unfolded = tl.unfold(tensor, mode=0)
tl.fold(unfolded, mode=0, shape=tensor.shape)

tensor([[[ 0.,  1.],
         [ 2.,  3.],
         [ 4.,  5.],
         [ 6.,  7.]],

        [[ 8.,  9.],
         [10., 11.],
         [12., 13.],
         [14., 15.]],

        [[16., 17.],
         [18., 19.],
         [20., 21.],
         [22., 23.]]], device='cuda:0')

In [13]:
df = pd.read_csv('../data/PRSA/PRSA_Data_Aotizhongxin_20130301-20170228.csv')
df

Unnamed: 0,No,year,month,day,hour,PM2.5,PM10,SO2,NO2,CO,O3,TEMP,PRES,DEWP,RAIN,wd,WSPM,station
0,1,2013,3,1,0,4.0,4.0,4.0,7.0,300.0,77.0,-0.7,1023.0,-18.8,0.0,NNW,4.4,Aotizhongxin
1,2,2013,3,1,1,8.0,8.0,4.0,7.0,300.0,77.0,-1.1,1023.2,-18.2,0.0,N,4.7,Aotizhongxin
2,3,2013,3,1,2,7.0,7.0,5.0,10.0,300.0,73.0,-1.1,1023.5,-18.2,0.0,NNW,5.6,Aotizhongxin
3,4,2013,3,1,3,6.0,6.0,11.0,11.0,300.0,72.0,-1.4,1024.5,-19.4,0.0,NW,3.1,Aotizhongxin
4,5,2013,3,1,4,3.0,3.0,12.0,12.0,300.0,72.0,-2.0,1025.2,-19.5,0.0,N,2.0,Aotizhongxin
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35059,35060,2017,2,28,19,12.0,29.0,5.0,35.0,400.0,95.0,12.5,1013.5,-16.2,0.0,NW,2.4,Aotizhongxin
35060,35061,2017,2,28,20,13.0,37.0,7.0,45.0,500.0,81.0,11.6,1013.6,-15.1,0.0,WNW,0.9,Aotizhongxin
35061,35062,2017,2,28,21,16.0,37.0,10.0,66.0,700.0,58.0,10.8,1014.2,-13.3,0.0,NW,1.1,Aotizhongxin
35062,35063,2017,2,28,22,21.0,44.0,12.0,87.0,700.0,35.0,10.5,1014.4,-12.9,0.0,NNW,1.2,Aotizhongxin


In [18]:
df['datetime'] = df[['year','month','day','hour']].apply(lambda x: pd.datetime.strptime(str(x['year'])+'-'+str(x['day'])+'-'+str(x['month'])+' '+str(x['hour']), "%Y-%M-%d %H").timestamp(), axis=1)


# CP Decomposition

In [97]:
X = df[['datetime','PM2.5', 'PM10', 'TEMP', 'PRES', 'CO', 'O3', 'SO2']].values
X

array([[1.35717126e+09, 4.00000000e+00, 4.00000000e+00, ...,
        3.00000000e+02, 7.70000000e+01, 4.00000000e+00],
       [1.35717486e+09, 8.00000000e+00, 8.00000000e+00, ...,
        3.00000000e+02, 7.70000000e+01, 4.00000000e+00],
       [1.35717846e+09, 7.00000000e+00, 7.00000000e+00, ...,
        3.00000000e+02, 7.30000000e+01, 5.00000000e+00],
       ...,
       [1.48339248e+09, 1.60000000e+01, 3.70000000e+01, ...,
        7.00000000e+02, 5.80000000e+01, 1.00000000e+01],
       [1.48339608e+09, 2.10000000e+01, 4.40000000e+01, ...,
        7.00000000e+02, 3.50000000e+01, 1.20000000e+01],
       [1.48339968e+09, 1.90000000e+01, 3.10000000e+01, ...,
        6.00000000e+02, 4.20000000e+01, 1.00000000e+01]])

In [98]:
import tensorly as tl
import numpy as np

tl.set_backend('pytorch') # Or 'mxnet', 'numpy', 'tensorflow' or 'cupy'

tensor = tl.tensor(X, device='cuda:0')
tensor


RuntimeError: CUDA error: an illegal memory access was encountered

In [36]:
from tensorly.decomposition import parafac
factors = parafac(tensor, rank=2)
len(factors)

2

In [37]:
factors.factors

[tensor([[nan, nan],
         [nan, nan],
         [nan, nan],
         ...,
         [nan, nan],
         [nan, nan],
         [nan, nan]], device='cuda:0'),
 tensor([[nan, nan],
         [nan, nan],
         [nan, nan],
         [nan, nan],
         [nan, nan]], device='cuda:0')]

In [104]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
tensor = tl.tensor(X_scaled[:30, :])
tensor

tensor([[0.0000e+00, 1.1173e-03, 2.0367e-03, 2.8098e-01, 6.6132e-01, 2.0202e-02,
         1.8162e-01, 1.0902e-02],
        [2.8520e-05, 5.5866e-03, 6.1100e-03, 2.7400e-01, 6.6488e-01, 2.0202e-02,
         1.8162e-01, 1.0902e-02],
        [5.7039e-05, 4.4693e-03, 5.0916e-03, 2.7400e-01, 6.7023e-01, 2.0202e-02,
         1.7216e-01, 1.3837e-02],
        [8.5559e-05, 3.3520e-03, 4.0733e-03, 2.6876e-01, 6.8806e-01, 2.0202e-02,
         1.6979e-01, 3.1447e-02],
        [1.1408e-04, 0.0000e+00, 1.0183e-03, 2.5829e-01, 7.0053e-01, 2.0202e-02,
         1.6979e-01, 3.4382e-02],
        [1.4260e-04, 2.2346e-03, 3.0550e-03, 2.5480e-01, 7.0766e-01, 3.0303e-02,
         1.5560e-01, 5.1992e-02],
        [1.7112e-04, 0.0000e+00, 1.0183e-03, 2.4782e-01, 7.2371e-01, 4.0404e-02,
         1.1776e-01, 5.1992e-02],
        [1.9964e-04, 0.0000e+00, 4.0733e-03, 2.6527e-01, 7.3975e-01, 4.0404e-02,
         1.0120e-01, 5.4927e-02],
        [2.2816e-04, 0.0000e+00, 4.0733e-03, 2.9494e-01, 7.5579e-01, 4.0404e-02,

In [108]:
from tensorly.decomposition import randomised_parafac, parafac
# factors = randomised_parafac(tensor, rank=2, verbose=2, n_samples=1000)
factors = parafac(tensor, rank=2, verbose=2)
factors

Starting iteration 1
Mode 0 of 2
Mode 1 of 2
reconstruction error=0.03604860231280327
Starting iteration 2
Mode 0 of 2
Mode 1 of 2
iteration 1, reconstruction error: 0.03604860231280327, decrease = 0.0, unnormalized = 0.16250891983509064
PARAFAC converged after 1 iterations


(weights, factors) : rank-2 CPTensor of shape (30, 8) 

In [110]:
factors.factors

[tensor([[ 7.3883e-01,  4.5403e-02],
         [ 7.3952e-01,  3.9226e-02],
         [ 7.4289e-01,  3.0973e-02],
         [ 7.5751e-01,  1.5196e-02],
         [ 7.6498e-01,  3.0007e-03],
         [ 7.6900e-01, -1.5513e-02],
         [ 7.7489e-01, -4.9152e-02],
         [ 7.9338e-01, -5.4002e-02],
         [ 8.1952e-01, -3.5193e-02],
         [ 8.3468e-01, -5.2309e-04],
         [ 8.3922e-01,  2.9988e-02],
         [ 8.4538e-01,  4.0240e-02],
         [ 8.3789e-01,  6.4211e-02],
         [ 8.3049e-01,  8.9869e-02],
         [ 8.3069e-01,  1.0126e-01],
         [ 8.2845e-01,  1.0338e-01],
         [ 8.2519e-01,  9.9027e-02],
         [ 8.2554e-01,  7.1438e-02],
         [ 8.3384e-01,  4.6003e-02],
         [ 8.3959e-01,  1.4034e-02],
         [ 8.4720e-01, -6.8403e-03],
         [ 8.5417e-01, -3.4745e-02],
         [ 8.5692e-01, -6.4265e-02],
         [ 8.5170e-01, -7.4742e-02],
         [ 8.6101e-01, -6.6942e-02],
         [ 8.6337e-01, -6.4257e-02],
         [ 8.5221e-01, -7.0836e-02],
 

In [109]:
factors.factors[0]

tensor([[ 7.3883e-01,  4.5403e-02],
        [ 7.3952e-01,  3.9226e-02],
        [ 7.4289e-01,  3.0973e-02],
        [ 7.5751e-01,  1.5196e-02],
        [ 7.6498e-01,  3.0007e-03],
        [ 7.6900e-01, -1.5513e-02],
        [ 7.7489e-01, -4.9152e-02],
        [ 7.9338e-01, -5.4002e-02],
        [ 8.1952e-01, -3.5193e-02],
        [ 8.3468e-01, -5.2309e-04],
        [ 8.3922e-01,  2.9988e-02],
        [ 8.4538e-01,  4.0240e-02],
        [ 8.3789e-01,  6.4211e-02],
        [ 8.3049e-01,  8.9869e-02],
        [ 8.3069e-01,  1.0126e-01],
        [ 8.2845e-01,  1.0338e-01],
        [ 8.2519e-01,  9.9027e-02],
        [ 8.2554e-01,  7.1438e-02],
        [ 8.3384e-01,  4.6003e-02],
        [ 8.3959e-01,  1.4034e-02],
        [ 8.4720e-01, -6.8403e-03],
        [ 8.5417e-01, -3.4745e-02],
        [ 8.5692e-01, -6.4265e-02],
        [ 8.5170e-01, -7.4742e-02],
        [ 8.6101e-01, -6.6942e-02],
        [ 8.6337e-01, -6.4257e-02],
        [ 8.5221e-01, -7.0836e-02],
        [ 8.4894e-01, -5.523

In [19]:
df

Unnamed: 0,No,year,month,day,hour,PM2.5,PM10,SO2,NO2,CO,O3,TEMP,PRES,DEWP,RAIN,wd,WSPM,station,datetime
0,1,2013,3,1,0,4.0,4.0,4.0,7.0,300.0,77.0,-0.7,1023.0,-18.8,0.0,NNW,4.4,Aotizhongxin,1.357171e+09
1,2,2013,3,1,1,8.0,8.0,4.0,7.0,300.0,77.0,-1.1,1023.2,-18.2,0.0,N,4.7,Aotizhongxin,1.357175e+09
2,3,2013,3,1,2,7.0,7.0,5.0,10.0,300.0,73.0,-1.1,1023.5,-18.2,0.0,NNW,5.6,Aotizhongxin,1.357178e+09
3,4,2013,3,1,3,6.0,6.0,11.0,11.0,300.0,72.0,-1.4,1024.5,-19.4,0.0,NW,3.1,Aotizhongxin,1.357182e+09
4,5,2013,3,1,4,3.0,3.0,12.0,12.0,300.0,72.0,-2.0,1025.2,-19.5,0.0,N,2.0,Aotizhongxin,1.357186e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35059,35060,2017,2,28,19,12.0,29.0,5.0,35.0,400.0,95.0,12.5,1013.5,-16.2,0.0,NW,2.4,Aotizhongxin,1.483385e+09
35060,35061,2017,2,28,20,13.0,37.0,7.0,45.0,500.0,81.0,11.6,1013.6,-15.1,0.0,WNW,0.9,Aotizhongxin,1.483389e+09
35061,35062,2017,2,28,21,16.0,37.0,10.0,66.0,700.0,58.0,10.8,1014.2,-13.3,0.0,NW,1.1,Aotizhongxin,1.483392e+09
35062,35063,2017,2,28,22,21.0,44.0,12.0,87.0,700.0,35.0,10.5,1014.4,-12.9,0.0,NNW,1.2,Aotizhongxin,1.483396e+09
