In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity, linear_kernel, polynomial_kernel
import math

from autorecommender.data import Dataset, ratings_matrix_to_list
from autorecommender.models.item_based import ItemSimilarity
from autorecommender.models.user_based import UserSimilarity
from autorecommender.models.autoencoder import Autoencoder
from autorecommender.models.combined import Combined


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
DATA_PATH = '../../../Toys/Recommender/Bake-off/' # 
MOVIES_CSV = DATA_PATH + 'bakeoff_movies.csv' #
USERS_CSV = DATA_PATH + 'bakeoff_train_users.csv' # 
RATINGS_CSV = DATA_PATH + 'bakeoff_train_ratings.csv' #

In [3]:
RATINGS = pd.read_csv(RATINGS_CSV)

# RATINGS = RATINGS[(RATINGS['movieid'] < 11) & (RATINGS['userid'] < 40)].set_index(['userid', 'movieid']).sort_index()[['rating']]
RATINGS = RATINGS.set_index(['userid', 'movieid']).sort_index()[['rating']]

MOVIES = pd.read_csv(MOVIES_CSV)
# MOVIES = MOVIES[MOVIES['movieid'] < 11].set_index('movieid').sort_index()
MOVIES = MOVIES.set_index('movieid').sort_index()

USERS = pd.read_csv(USERS_CSV)
# USERS = USERS[USERS['userid'] < 40].set_index('userid').sort_index()
USERS = USERS.set_index('userid').sort_index()

In [4]:
dataset = Dataset(MOVIES, USERS, RATINGS)

In [5]:
model = Combined(item_count=len(dataset.items))

In [6]:
model.fit(dataset, batch_size=64, epochs=15, patience=0)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Iteration: 0


IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [7]:
VAL_RATINGS = pd.read_csv(DATA_PATH + 'SECRET_bakeoff_validation_ratings.csv').set_index(['userid', 'movieid']).sort_index()[['rating']]
VAL_RATINGS_X = pd.read_csv(DATA_PATH + 'bakeoff_validation_ratings.csv').set_index(['userid', 'movieid']).sort_index()[['rating']]

val_idx_X = VAL_RATINGS_X.index
val_idx_y = VAL_RATINGS.index
val_idx = val_idx_y.difference(val_idx_X)

val_masked_ratings = VAL_RATINGS.loc[val_idx]

In [8]:
val_predictions = model.predict(VAL_RATINGS_X)

val_model_ratings = ratings_matrix_to_list(val_predictions).loc[val_idx]
VAL_MSE = ((val_masked_ratings - val_model_ratings)**2).mean()['rating']
VAL_MAE = (abs(val_masked_ratings - val_model_ratings)).mean()['rating']
VAL_RMSE = math.sqrt(VAL_MSE)

print('Bake-off RMSE is: {}, MAE is: {}'.format(VAL_RMSE, VAL_MAE))

Bake-off RMSE is: 0.8985157202023762, MAE is: 0.7073700940243968


In [12]:
umodel = UserSimilarity(threshold=1.0)

In [13]:
umodel.fit(dataset)

In [14]:
val_predictions = umodel.predict(VAL_RATINGS_X)

val_model_ratings = ratings_matrix_to_list(val_predictions).loc[val_idx]
VAL_MSE = ((val_masked_ratings - val_model_ratings)**2).mean()['rating']
VAL_MAE = (abs(val_masked_ratings - val_model_ratings)).mean()['rating']
VAL_RMSE = math.sqrt(VAL_MSE)

print('Bake-off RMSE is: {}, MAE is: {}'.format(VAL_RMSE, VAL_MAE))

Bake-off RMSE is: 0.9172428688820231, MAE is: 0.7196982080341882


In [9]:
val_predictions = model.predict(VAL_RATINGS_X, null=True)

val_model_ratings = ratings_matrix_to_list(val_predictions).loc[val_idx]
VAL_MSE = ((val_masked_ratings - val_model_ratings)**2).mean()['rating']
VAL_MAE = (abs(val_masked_ratings - val_model_ratings)).mean()['rating']
VAL_RMSE = math.sqrt(VAL_MSE)

print('Bake-off RMSE is: {}, MAE is: {}'.format(VAL_RMSE, VAL_MAE))

Bake-off RMSE is: 0.922183808039501, MAE is: 0.7233853931358244


In [6]:
model = ItemSimilarity()

In [7]:
model.fit(dataset)

In [7]:
VAL_RATINGS = pd.read_csv(DATA_PATH + 'SECRET_bakeoff_validation_ratings.csv').set_index(['userid', 'movieid']).sort_index()[['rating']]
VAL_RATINGS_X = pd.read_csv(DATA_PATH + 'bakeoff_validation_ratings.csv').set_index(['userid', 'movieid']).sort_index()[['rating']]

val_idx_X = VAL_RATINGS_X.index
val_idx_y = VAL_RATINGS.index
val_idx = val_idx_y.difference(val_idx_X)

val_masked_ratings = VAL_RATINGS.loc[val_idx]

In [8]:
val_predictions = model.predict(VAL_RATINGS_X)

val_model_ratings = ratings_matrix_to_list(val_predictions).loc[val_idx]
VAL_MSE = ((val_masked_ratings - val_model_ratings)**2).mean()['rating']
VAL_MAE = (abs(val_masked_ratings - val_model_ratings)).mean()['rating']
VAL_RMSE = math.sqrt(VAL_MSE)

print('Bake-off RMSE is: {}, MAE is: {}'.format(VAL_RMSE, VAL_MAE))

Bake-off RMSE is: 0.9064337598660396, MAE is: 0.7120072062141687


In [9]:
val_predictions = model.predict(VAL_RATINGS_X, null=True)

val_model_ratings = ratings_matrix_to_list(val_predictions).loc[val_idx]
VAL_MSE = ((val_masked_ratings - val_model_ratings)**2).mean()['rating']
VAL_MAE = (abs(val_masked_ratings - val_model_ratings)).mean()['rating']
VAL_RMSE = math.sqrt(VAL_MSE)

print('Bake-off RMSE is: {}, MAE is: {}'.format(VAL_RMSE, VAL_MAE))

Bake-off RMSE is: 0.922183808039501, MAE is: 0.7233853931358244


In [10]:
model2 = Autoencoder(item_count=len(dataset.items))

In [11]:
model2.fit(dataset, batch_size=64, epochs=30, patience=0)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x1a31f0fb38>

In [12]:
val_predictions = model2.predict(VAL_RATINGS_X)

val_model_ratings = ratings_matrix_to_list(val_predictions).loc[val_idx]
VAL_MSE = ((val_masked_ratings - val_model_ratings)**2).mean()['rating']
VAL_MAE = (abs(val_masked_ratings - val_model_ratings)).mean()['rating']
VAL_RMSE = math.sqrt(VAL_MSE)

print('Bake-off RMSE is: {}, MAE is: {}'.format(VAL_RMSE, VAL_MAE))

Bake-off RMSE is: 0.9087807416724243, MAE is: 0.7150812929468041


In [13]:
val_predictions = model2.predict(VAL_RATINGS_X, null=True)

val_model_ratings = ratings_matrix_to_list(val_predictions).loc[val_idx]
VAL_MSE = ((val_masked_ratings - val_model_ratings)**2).mean()['rating']
VAL_MAE = (abs(val_masked_ratings - val_model_ratings)).mean()['rating']
VAL_RMSE = math.sqrt(VAL_MSE)

print('Bake-off RMSE is: {}, MAE is: {}'.format(VAL_RMSE, VAL_MAE))

Bake-off RMSE is: 0.9218358916412948, MAE is: 0.7232129871905056


In [11]:
x = np.array(dataset._make_prefmatrix())

In [12]:
preds = x.dot(model._sim).fillna(0)

In [17]:
preds[preds > 1.0] = 1.0
preds[preds < -1.0] = -1.0

In [23]:
preds = (preds + 1.0)/2.0

In [24]:
preds.head()

movieid,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.47124,0.523297,0.232976,0.515751,0.309136,0.280969,0.232976,0.576352,0.398882,0.260908,...,0.309136,0.515751,0.523878,0.441522,0.286692,0.309136,0.713139,0.713139,0.713139,0.49988
2,0.181196,0.5554,0.108027,0.2282,0.0,0.147347,0.108027,0.583007,0.260181,0.321424,...,0.0,0.2282,0.567775,0.350529,0.313126,0.0,0.667801,0.667801,0.667801,0.486514
3,0.2351,0.626739,0.126719,0.101035,0.085445,0.609255,0.126719,0.617884,0.730852,0.718285,...,0.085445,0.101035,0.583474,0.522845,0.458383,0.085445,0.350333,0.350333,0.350333,0.364742
7,0.491506,0.577511,0.471993,0.258358,0.485287,0.523989,0.471993,0.594932,0.368476,0.647314,...,0.485287,0.258358,0.577511,0.380997,0.752426,0.485287,0.172979,0.172979,0.172979,0.447253
8,0.379445,0.379353,0.682296,0.0,0.445861,0.733033,0.682296,0.352238,0.833914,0.741046,...,0.445861,0.0,0.377769,0.084783,0.640631,0.445861,0.0,0.0,0.0,0.0


In [12]:
x

array([[ 0.11106065,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [-0.17955989,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]])

In [19]:
model._sim = model._sim.fillna(0)

In [20]:
pred = x.dot(model._sim)/(w.dot(model._sim))

  """Entry point for launching an IPython kernel.


In [17]:
w = (x != 0).astype(int)

  return umr_maximum(a, axis, None, out, keepdims, initial)


nan

In [23]:
pm = dataset._make_prefmatrix()
X = np.array(pm)
w = (x != 0).astype(int)

preds = X.dot(model._sim)
den = w.dot(model._sim)
den[den == 0] = 1.0

preds = preds/den

In [26]:
preds.min()

-0.8836122074507149

In [3]:
x = pd.DataFrame([[5, 4, 0, -4], [0, 4, 0, 0], [2, 0, -5, -4]])
x.index.name = 'user'
x

Unnamed: 0_level_0,0,1,2,3
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,5,4,0,-4
1,0,4,0,0
2,2,0,-5,-4


In [19]:
x.transpose().corr()

user,0,1,2
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1.0,0.445742,0.852376
1,0.445742,1.0,0.353103
2,0.852376,0.353103,1.0


In [13]:
s = cosine_similarity(x)

In [7]:
s.dot(x)

array([[ 6.02673805,  6.11925177, -2.56684514, -6.05347611],
       [ 2.64906471,  6.11925177,  0.        , -2.11925177],
       [ 4.56684514,  2.05347611, -5.        , -6.05347611]])

In [14]:
s2 = s.copy()

In [15]:
np.fill_diagonal(s2, 0)

In [16]:
s2

array([[0.        , 0.52981294, 0.51336903],
       [0.52981294, 0.        , 0.        ],
       [0.51336903, 0.        , 0.        ]])

In [17]:
s

array([[1.        , 0.52981294, 0.51336903],
       [0.52981294, 1.        , 0.        ],
       [0.51336903, 0.        , 1.        ]])

In [18]:
s2.dot(x)

array([[ 1.02673805,  2.11925177, -2.56684514, -2.05347611],
       [ 2.64906471,  2.11925177,  0.        , -2.11925177],
       [ 2.56684514,  2.05347611,  0.        , -2.05347611]])

In [21]:
w = s2.sum(axis=1).reshape(-1,1)

In [22]:
s2/w

array([[0.        , 0.50788161, 0.49211839],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ]])

In [10]:
pm = dataset._make_prefmatrix()

In [11]:
smallpm = pm.loc[[1, 2, 4169, 1941, 1181, 889, 3618]][[2858, 260, 1196, 1210, 480, 2028, 589, 1270, 2571]]

In [12]:
smallpm

movieid,2858,260,1196,1210,480,2028,589,1270,2571
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,0.0,-0.158563,0.0,0.0,0.0,0.068356,0.0,0.137387,0.0
2,-0.065387,0.0,0.140469,-0.008982,0.250809,-0.06969,-0.011212,0.0,-0.06722
4169,0.097716,0.06803,0.102038,0.154121,0.212378,0.093414,-0.049643,-0.03909,-0.105651
1941,-0.644996,0.131456,0.165464,0.217547,0.275804,0.156839,-0.187752,0.22587,-0.24376
1181,-0.144671,0.027177,0.061186,-0.088266,-0.231544,0.052561,0.312574,-0.079943,0.256566
889,-0.353751,0.019631,0.05364,-0.095812,-0.037555,-0.15652,0.305027,0.114046,0.24902
3618,0.011025,-0.018662,-0.186188,-0.134105,0.125687,-0.194813,-0.136335,0.075753,-0.192343


In [13]:
s = cosine_similarity(smallpm)
np.fill_diagonal(s, 0)
s = pd.DataFrame(s, index=smallpm.index, columns=smallpm.index)
s

userid,1,2,4169,1941,1181,889,3618
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0.0,-0.069499,-0.129291,0.109097,-0.104084,0.014612,0.000552
2,-0.069499,0.0,0.573057,0.519639,-0.402174,0.071874,0.266006
4169,-0.129291,0.573057,0.0,0.322012,-0.592658,-0.562955,-0.051511
1941,0.109097,0.519639,0.322012,0.0,-0.242052,0.183288,0.067821
1181,-0.104084,-0.402174,-0.592658,-0.242052,0.0,0.729699,-0.665432
889,0.014612,0.071874,-0.562955,0.183288,0.729699,0.0,-0.239161
3618,0.000552,0.266006,-0.051511,0.067821,-0.665432,-0.239161,0.0


In [29]:
t = s.dot(smallpm)
t

array([[-6.85622153e-02,  2.99367303e-03, -1.05907629e-02,
         1.21445391e-02,  8.82038927e-03,  2.01123362e-03,
        -4.14374720e-02,  3.97249111e-02, -3.14337721e-02],
       [-2.43478153e-01,  1.03831167e-01,  7.41762769e-02,
         1.94305454e-01,  3.88878431e-01,  4.60707964e-02,
        -2.66062514e-01,  1.45920486e-01, -3.23661211e-01],
       [ 3.91513439e-02,  3.66343027e-02,  7.69103249e-02,
         1.78062770e-01,  3.84434117e-01,  6.87279900e-02,
        -4.16826898e-01,  3.42441238e-02, -3.99349345e-01],
       [-3.15845540e-02,  3.61812259e-04,  8.82448142e-02,
         3.96700978e-02,  2.56404979e-01, -5.32988585e-02,
        -5.08094432e-02,  4.77920910e-02, -9.84558140e-02],
       [-1.40961421e-01, -2.88907190e-02,  6.01837973e-03,
        -1.21062798e-01, -4.04535353e-01, -5.69905667e-02,
         3.92675910e-01, -1.29941876e-02,  4.58351724e-01],
       [-2.86132295e-01,  7.77383279e-03,  7.21567680e-02,
        -7.98701630e-02, -2.49997800e-01,  5.709431

In [16]:
abs(s).sum(axis=1)

userid
1       0.427136
2       1.902250
4169    2.231486
1941    1.443909
1181    2.736099
889     1.801590
3618    1.290483
dtype: float64

In [17]:
s = cosine_similarity(smallpm)
np.fill_diagonal(s, 0)
s

array([[ 0.00000000e+00, -6.94988139e-02, -1.29291419e-01,
         1.09097487e-01, -1.04083659e-01,  1.46123562e-02,
         5.51778376e-04],
       [-6.94988139e-02,  0.00000000e+00,  5.73057284e-01,
         5.19638975e-01, -4.02173974e-01,  7.18743806e-02,
         2.66006230e-01],
       [-1.29291419e-01,  5.73057284e-01,  0.00000000e+00,
         3.22012377e-01, -5.92658231e-01, -5.62955283e-01,
        -5.15112347e-02],
       [ 1.09097487e-01,  5.19638975e-01,  3.22012377e-01,
         0.00000000e+00, -2.42051841e-01,  1.83287578e-01,
         6.78208029e-02],
       [-1.04083659e-01, -4.02173974e-01, -5.92658231e-01,
        -2.42051841e-01,  0.00000000e+00,  7.29699348e-01,
        -6.65432078e-01],
       [ 1.46123562e-02,  7.18743806e-02, -5.62955283e-01,
         1.83287578e-01,  7.29699348e-01,  0.00000000e+00,
        -2.39161353e-01],
       [ 5.51778376e-04,  2.66006230e-01, -5.15112347e-02,
         6.78208029e-02, -6.65432078e-01, -2.39161353e-01,
         0.0000000

In [23]:
w = abs(s).sum(axis=0).reshape(-1, 1)
w

array([[0.42713551],
       [1.90224966],
       [2.23148583],
       [1.44390906],
       [2.73609913],
       [1.8015903 ],
       [1.29048348]])

In [30]:
t/w

array([[-0.16051631,  0.00700872, -0.02479485,  0.02843252,  0.0206501 ,
         0.00470865, -0.09701247,  0.09300306, -0.07359204],
       [-0.12799485,  0.05458335,  0.03899398,  0.10214509,  0.20443081,
         0.02421911, -0.1398673 ,  0.07670943, -0.17014655],
       [ 0.01754497,  0.016417  ,  0.03446597,  0.07979561,  0.17227719,
         0.03079921, -0.18679343,  0.01534588, -0.17896118],
       [-0.02187434,  0.00025058,  0.06111522,  0.0274741 ,  0.17757696,
        -0.03691289, -0.03518881,  0.0330991 , -0.06818699],
       [-0.05151912, -0.01055909,  0.00219962, -0.0442465 , -0.14785113,
        -0.02082913,  0.1435167 , -0.00474917,  0.16752015],
       [-0.15882207,  0.00431498,  0.04005171, -0.04433314, -0.13876507,
         0.03169107,  0.1406641 , -0.00612712,  0.13498293],
       [ 0.08888234, -0.01352662, -0.00791327,  0.06670007,  0.18407088,
        -0.00791753, -0.22790378,  0.03357599, -0.20089649]])

In [15]:
((t - smallpm)**2).mean().mean()

0.024005996650785608

In [22]:
dataset.ratings.reset_index()['userid'].unique()

array([   1,    2,    3, ..., 6038, 6039, 6040])

In [30]:
t.reset_index().set_index('userid')

movieid,2858,260,1196,1210,480,2028,589,1270,2571
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,-0.068562,0.002994,-0.010591,0.012145,0.00882,0.002011,-0.041437,0.039725,-0.031434
2,-0.243478,0.103831,0.074176,0.194305,0.388878,0.046071,-0.266063,0.14592,-0.323661
4169,0.039151,0.036634,0.07691,0.178063,0.384434,0.068728,-0.416827,0.034244,-0.399349
1941,-0.031585,0.000362,0.088245,0.03967,0.256405,-0.053299,-0.050809,0.047792,-0.098456
1181,-0.140961,-0.028891,0.006018,-0.121063,-0.404535,-0.056991,0.392676,-0.012994,0.458352
889,-0.286132,0.007774,0.072157,-0.07987,-0.249998,0.057094,0.253419,-0.011039,0.243184
3618,0.114701,-0.017456,-0.010212,0.086075,0.23754,-0.010217,-0.294106,0.043329,-0.259254


In [22]:
pm = dataset._make_prefmatrix()

In [23]:
s = cosine_similarity(pm)
np.fill_diagonal(s, 0)

In [30]:
s.std()

0.035648532729832816

In [11]:
w = abs(s).sum(axis=0).reshape(-1, 1)

In [12]:
preds = s.dot(pm)

In [15]:
(preds / w).shape

(4832, 3883)

In [16]:
preds.shape

(4832, 3883)

In [17]:
pm.shape

(4832, 3883)

In [17]:
from scipy.optimize import minimize

In [22]:
def f(a, b):
    def func(x):
        return a*x + b
    return func

def loss(params, X=x, y=y):
    a, b = params
    return ((f(a,b)(X) - y)**2).mean()
    

In [20]:
x, y = np.array([1, 2, 3, 4]), np.array([3, 5, 7, 9])

In [25]:
loss((2, 1), x, y)

0.0

In [23]:
minimize(loss, x0 = [0, 0])['x']

array([2.00000046, 1.00000015])

In [31]:
minimize(loss, x0 = [0, 0])

      fun: 1.964025539998162e-12
 hess_inv: array([[ 0.39995421, -1.00000735],
       [-1.00000735,  2.99999882]])
      jac: array([7.78078828e-06, 2.61995017e-06])
  message: 'Optimization terminated successfully.'
     nfev: 20
      nit: 4
     njev: 5
   status: 0
  success: True
        x: array([2.00000046, 1.00000015])

In [8]:
x = np.array([[1, 2, 0], [4, 0, 0], [5, 0, -1]])
y = np.array([[1, 2, 3], [4, 0, 1], [5, 2, -1]])
p = np.array([[0.5, 0.5, 2.5], [3.5, 0.5, 1.5], [5, 1.5, -1]])

In [11]:
y[(x != y) & (y != 0)]

array([3, 1, 2])

In [12]:
p[(x != y) & (y != 0)]

array([2.5, 1.5, 1.5])

In [13]:
idx = (x != y) & (y != 0)

In [16]:
((y[idx] - p[idx])**2).mean()

0.25

In [24]:
g = dataset.generate()

In [26]:
(X, y), z = next(g)

In [27]:
X

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.0823234 , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.14540313],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.10922752, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [28]:
y

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.0823234 , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.14540313],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.28106607, 0.10922752, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [32]:
X.shape

(64, 3883)

In [34]:
X.mean(axis=0).shape

(3883,)

In [35]:
np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]).mean(axis=0)

array([4.5, 5.5, 6.5])