# Matrix Completion 
# Steam Games data

### Jeffrey Tumminia

## Install

- cvxpy
- matrix_completion
- fancyimpute
- CVXOPT



In [41]:
import numpy as np
import matrix_completion as mc
from fancyimpute import KNN, SoftImpute, BiScaler

In [27]:
pth = './'
stm = np.load(pth+'steam_100.npy')
mask = np.load(pth+'missing_100.npy')

print(stm.shape)
print(mask.shape)

(174, 102)
(174, 102)


In [28]:
#% of values present
sum(sum(mask))/(mask.shape[0]*mask.shape[1])

0.40455262564796035

### matrix_completion

- NuclearNorm
- Probabilistic Matrix Factorization with ALS

In [29]:
stm

array([[41. ,  0. ,  1.2, ...,  0. ,  0. , 25. ],
       [ 0. ,  0. ,  0. , ...,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. , ...,  0. ,  0. , 41. ],
       ...,
       [ 0. ,  0. ,  0. , ...,  1.1,  2.2,  0. ],
       [ 0. ,  0. ,  0. , ...,  0. ,  0. ,  0. ],
       [ 3.6,  0. ,  0. , ...,  0. ,  0. ,  0. ]])

In [30]:
mean = np.mean(stm, axis=1).reshape(-1,1)
stm_norm = stm - mean
m = 1/np.sqrt(max(stm.shape))

In [31]:
R_hat = mc.nuclear_norm_solve(stm_norm, mask, mu=m)
out = R_hat+mean
print(out.shape)
out

(174, 102)


array([[ 41.00098404,  44.07160478,   1.2020442 , ...,   5.04728754,
        151.99187027,  25.00307346],
       [ 19.57360767,   8.77928273,   2.11818534, ...,   3.58492759,
         34.72719512,  -2.42154936],
       [ 33.13801516,  -7.27257992,   4.76620586, ...,  16.92765747,
         56.27514549,  41.00111642],
       ...,
       [ 94.5679989 ,  40.57714998,  -0.46353866, ...,   1.10032921,
          2.19931692,  24.42561892],
       [ 11.81607138,   0.81033427,  13.64559876, ...,   5.30424555,
         10.07336101,  12.32462632],
       [  3.60028257, -13.22906341,  -4.98773617, ...,  13.54523547,
         34.38795261,   6.11857123]])

In [32]:
R_hat

array([[ 13.35686639,  16.42748713, -26.44207345, ..., -22.59683011,
        124.34775262,  -2.64104419],
       [ 13.83537238,   3.04104743,  -3.62004995, ...,  -2.15330771,
         28.98895982,  -8.15978465],
       [ 18.80566222, -21.60493286,  -9.56614708, ...,   2.59530452,
         41.94279255,  26.66876348],
       ...,
       [ 77.50035185,  23.50950292, -17.53118572, ..., -15.96731785,
        -14.86833014,   7.35797186],
       [  8.91018902,  -2.09554809,  10.73971641, ...,   2.39836319,
          7.16747866,   9.41874397],
       [ -8.57226645, -25.40161243, -17.16028519, ...,   1.37268645,
         22.21540359,  -6.05397779]])

In [62]:
R_pmf = mc.pmf_solve(stm_norm, mask, k=50, mu=1e-2, max_iterations=500)

In [63]:
R_pmf+mean

array([[ 4.10000034e+01,  6.36721022e+01,  1.19955004e+00, ...,
        -1.08437331e+00, -2.74048208e+02,  2.50017534e+01],
       [-2.32154094e+01, -1.62221245e+00,  2.42775025e+01, ...,
         3.64316122e+00, -3.01593886e+01, -1.74734723e+01],
       [ 2.28465890e+01, -4.77218205e+00,  3.06109189e+01, ...,
        -6.50522469e+00,  7.96958953e+00,  4.10165942e+01],
       ...,
       [ 1.46024763e+02,  7.53570250e+01,  8.31792499e+01, ...,
         1.09998918e+00,  2.19930840e+00,  1.99694577e+02],
       [ 1.22880715e+01,  2.25701010e+00,  1.13943407e+01, ...,
         9.10546509e-02,  1.78216146e+01, -1.31288596e+01],
       [ 3.60009443e+00, -1.93073485e+01,  3.39879433e+01, ...,
         4.57299024e+00,  3.89139795e+01,  3.47493223e+01]])

### fancyimpute Knn

In [33]:
stm[mask==0]=np.nan
stm_knn = KNN(k=3).fit_transform(stm)

Imputing row 1/174 with 44 missing, elapsed time: 0.033
Imputing row 101/174 with 51 missing, elapsed time: 0.092


In [34]:
stm

array([[41. ,  nan,  1.2, ...,  nan,  nan, 25. ],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan, 41. ],
       ...,
       [ nan,  nan,  nan, ...,  1.1,  2.2,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ 3.6,  nan,  nan, ...,  nan,  nan,  nan]])

In [35]:
stm_knn

array([[4.10000000e+01, 1.96799645e+00, 1.20000000e+00, ...,
        8.62275713e+01, 3.47832640e+02, 2.50000000e+01],
       [2.32768557e+01, 9.01250764e-01, 1.04437672e+01, ...,
        5.60572931e-01, 9.85711732e+00, 4.18660788e+00],
       [4.18302829e+01, 1.32370542e+00, 1.11446725e+01, ...,
        6.53693567e+01, 6.91433631e+02, 4.10000000e+01],
       ...,
       [2.24228159e+02, 1.05651157e+00, 1.17298240e+01, ...,
        1.10000000e+00, 2.20000000e+00, 2.36879744e+01],
       [4.73813945e+01, 7.30637179e-01, 1.54165373e+01, ...,
        2.32314152e-01, 6.99431148e+00, 2.10672070e+01],
       [3.60000000e+00, 9.46153500e+00, 2.09999434e+01, ...,
        9.49032653e-01, 1.00422721e+02, 5.28961973e+00]])

In [36]:
# Instead of solving the nuclear norm objective directly, instead
# induce sparsity using singular value thresholding
stm_normalized = BiScaler().fit_transform(stm)
stm_softimpute = SoftImpute().fit_transform(stm_normalized)

[BiScaler] Initial log residual value = 13.928087
[BiScaler] Iter 1: log residual = 3.135388, log improvement ratio=10.792700
[BiScaler] Iter 2: log residual = 0.956633, log improvement ratio=2.178754
[BiScaler] Iter 3: log residual = -0.374851, log improvement ratio=1.331484
[BiScaler] Iter 4: log residual = -1.564625, log improvement ratio=1.189774
[BiScaler] Iter 5: log residual = -2.801501, log improvement ratio=1.236876
[BiScaler] Iter 6: log residual = -4.089811, log improvement ratio=1.288311
[BiScaler] Iter 7: log residual = -5.406391, log improvement ratio=1.316580
[BiScaler] Iter 8: log residual = -6.728927, log improvement ratio=1.322536
[BiScaler] Iter 9: log residual = -8.041014, log improvement ratio=1.312087
[BiScaler] Iter 10: log residual = -9.330566, log improvement ratio=1.289552
[BiScaler] Iter 11: log residual = -10.588270, log improvement ratio=1.257705
[BiScaler] Iter 12: log residual = -11.807207, log improvement ratio=1.218937
[BiScaler] Iter 13: log residual =

[SoftImpute] Iter 94: observed MAE=0.030590 rank=76
[SoftImpute] Iter 95: observed MAE=0.030588 rank=76
[SoftImpute] Iter 96: observed MAE=0.030586 rank=76
[SoftImpute] Iter 97: observed MAE=0.030585 rank=76
[SoftImpute] Iter 98: observed MAE=0.030584 rank=76
[SoftImpute] Iter 99: observed MAE=0.030583 rank=76
[SoftImpute] Iter 100: observed MAE=0.030582 rank=76
[SoftImpute] Stopped after iteration 100 for lambda=0.422206


In [39]:
stm_softimpute

array([[-0.48597304,  0.231446  , -0.88748857, ..., -0.09441767,
         0.08672113,  0.01939776],
       [ 0.49746988, -0.13513342,  0.07546035, ...,  0.10346634,
         0.01345405, -0.10543331],
       [ 0.14118951, -0.50954928,  0.38414498, ..., -0.01855425,
         0.15415591,  0.22608438],
       ...,
       [-0.02220715,  0.06035144, -0.53414393, ...,  0.42573419,
        -0.43607862, -0.42707169],
       [-0.32802568, -0.14215451,  1.06337993, ...,  1.12921004,
        -0.19221668,  0.21006092],
       [-0.62177798, -0.63376155,  0.03037778, ..., -0.58103392,
        -0.70713678, -0.54466177]])