In [1]:
import numpy as np
from sklearn.decomposition import PCA

In [2]:
# Define 5 users as a linear combination of latent users
users = np.array([
    [0.7, 0.2, 0.1],  # User 1: 70% Action, 20% Comedy, 10% Romance
    [0.0, 0.5, 0.5],  # User 2: 50% Comedy, 50% Romance
    [0.3, 0.4, 0.3],  # User 3: 30% Action, 40% Comedy, 30% Romance
    [0.0, 0.0, 1.0],  # User 4: 100% Romance
    [0.4, 0.0, 0.6]   # User 5: 40% Action, 60% Romance
])

In [3]:
# Define 5 movies as a linear combination of latent movie types
movies = np.array([
    [0.8, 0.2, 0.0],  # Movie 1: 80% Action, 20% Comedy
    [0.0, 0.6, 0.4],  # Movie 2: 60% Comedy, 40% Romance
    [0.5, 0.0, 0.5],  # Movie 3: 50% Action, 50% Romance
    [0.3, 0.0, 0.7],  # Movie 4: 30% Action, 70% Romance
    [0.0, 1.0, 0.0]   # Movie 5: 100% Comedy
])

In [7]:
# Generate the 5x5 user/movie rating matrix
rating_matrix = np.dot(users, movies.T)

In [8]:
rating_matrix

array([[0.6 , 0.16, 0.4 , 0.28, 0.2 ],
       [0.1 , 0.5 , 0.25, 0.35, 0.5 ],
       [0.32, 0.36, 0.3 , 0.3 , 0.4 ],
       [0.  , 0.4 , 0.5 , 0.7 , 0.  ],
       [0.32, 0.24, 0.5 , 0.54, 0.  ]])

In [37]:
# Apply PCA to Users
pca_users = PCA(n_components=2)  # Assuming 3 latent user types
users_pca = pca_users.fit_transform(rating_matrix)

In [38]:
users_pca

array([[-0.2204573 , -0.33973898],
       [-0.18922173,  0.35454074],
       [-0.23862911,  0.07071173],
       [ 0.44114064,  0.11445058],
       [ 0.2071675 , -0.19996406]])

In [39]:
# Apply PCA to Users
pca_users = PCA(n_components=2)  # Extracting the first three principal components
pca_users.fit(rating_matrix)

In [42]:
# Extracting PC1, PC2, and PC3 for Users
pc1_users = pca_users.components_[0]
pc2_users = pca_users.components_[1]

In [43]:
# Variance explained by each PCA for users
users_variance_explained = pca_users.explained_variance_ratio_*100

In [44]:
(pc1_users, pc2_users, users_variance_explained)

(array([-0.42564138,  0.02741269,  0.30901333,  0.58363668, -0.61802666]),
 array([-0.70102126,  0.48848286, -0.22995372,  0.07456616,  0.45990745]),
 array([55.87446522, 44.12553478]))

In [45]:
# Apply PCA to Movies
pca_movies = PCA(n_components=2)  # Assuming 3 latent movie types
movies_pca = pca_movies.fit_transform(rating_matrix.T)

In [52]:
# Variance explained by each PCA for users
movies_variance_explained = pca_movies.explained_variance_ratio_*100

In [53]:
movies_pca

array([[ 0.26866782,  0.40256379],
       [-0.02281001, -0.25822413],
       [-0.25092878,  0.12812004],
       [-0.44102465, -0.04469073],
       [ 0.44609562, -0.22776896]])

In [54]:
rating_matrix_rounded = rating_matrix.round(2)
users_pca_rounded = users_pca.round(2)
movies_pca_rounded = movies_pca.round(2)

In [55]:
(rating_matrix_rounded, users_pca_rounded, movies_pca_rounded, users_variance_explained,movies_variance_explained)

(array([[0.6 , 0.16, 0.4 , 0.28, 0.2 ],
        [0.1 , 0.5 , 0.25, 0.35, 0.5 ],
        [0.32, 0.36, 0.3 , 0.3 , 0.4 ],
        [0.  , 0.4 , 0.5 , 0.7 , 0.  ],
        [0.32, 0.24, 0.5 , 0.54, 0.  ]]),
 array([[-0.22, -0.34],
        [-0.19,  0.35],
        [-0.24,  0.07],
        [ 0.44,  0.11],
        [ 0.21, -0.2 ]]),
 array([[ 0.27,  0.4 ],
        [-0.02, -0.26],
        [-0.25,  0.13],
        [-0.44, -0.04],
        [ 0.45, -0.23]]),
 array([63.89422678, 36.10577322]),
 array([63.89422678, 36.10577322]))