# Singular Value Decomposition (SVD) Implementation

In this notebook, we'll implement SVD and use it in a real-life scenario: a simple recommendation system.

In [None]:
import numpy as np
import pandas as pd
from sklearn.decomposition import TruncatedSVD

In [None]:
# Create a simple user-movie rating dataset
ratings = pd.DataFrame({
    'user_id': ['1', '1', '1', '2', '2', '2', '3', '3', '3', '4', '4', '4'],
    'movie_id': ['A', 'B', 'C', 'A', 'B', 'D', 'C', 'D', 'E', 'B', 'E', 'F'],
    'rating': [5, 4, 3, 4, 5, 3, 2, 4, 5, 5, 4, 3]
})

# Pivot the dataframe to get user-movie matrix
user_movie_matrix = ratings.pivot_table(index='user_id', columns='movie_id', values='rating').fillna(0)

user_movie_matrix

In [None]:
# Perform SVD
svd = TruncatedSVD(n_components=2)
decomposed_matrix = svd.fit_transform(user_movie_matrix)

# Print the decomposed matrix
decomposed_matrix

NameError: name 'TruncatedSVD' is not defined

In [None]:
import numpy as np
import pandas as pd
from sklearn.decomposition import TruncatedSVD

In [None]:
# Perform SVD
svd = TruncatedSVD(n_components=2)
decomposed_matrix = svd.fit_transform(user_movie_matrix)

# Print the decomposed matrix
decomposed_matrix

array([[ 5.93218411, -3.11401167],
       [ 6.37009278, -1.92342991],
       [ 3.16204567,  5.16482759],
       [ 5.32236476,  2.70441758]])

In [None]:
# Compute correlation matrix
correlation_matrix = np.corrcoef(decomposed_matrix)
correlation_matrix

array([[ 1.,  1., -1.,  1.],
       [ 1.,  1., -1.,  1.],
       [-1., -1.,  1., -1.],
       [ 1.,  1., -1.,  1.]])

In [None]:
# Find the user most similar to the first user
similar_user_index = np.argmax(correlation_matrix[0])

# Find the movies rated by this user but not by the first user
movies_rated_by_similar_user = user_movie_matrix.iloc[similar_user_index]
movies_not_seen_by_first_user = user_movie_matrix.iloc[0].isna()

# Recommend a movie
recommended_movie = movies_rated_by_similar_user[movies_not_seen_by_first_user].idxmax()

recommended_movie

ValueError: attempt to get argmax of an empty sequence

In [None]:
# Create a more complex user-movie rating dataset
ratings = pd.DataFrame({
    'user_id': ['1', '1', '1', '2', '2', '2', '2', '3', '3', '3', '4', '4', '4', '5', '5'],
    'movie_id': ['A', 'B', 'C', 'A', 'B', 'D', 'E', 'C', 'D', 'E', 'B', 'E', 'F', 'E', 'F'],
    'rating': [5, 4, 3, 4, 5, 3, 4, 2, 4, 5, 5, 4, 3, 5, 4]
})

# Pivot the dataframe to get user-movie matrix
user_movie_matrix = ratings.pivot_table(index='user_id', columns='movie_id', values='rating').fillna(0)

# Perform SVD
svd = TruncatedSVD(n_components=2)
decomposed_matrix = svd.fit_transform(user_movie_matrix)

# Compute correlation matrix
correlation_matrix = np.corrcoef(decomposed_matrix)

# Find the user most similar to the first user
similar_user_index = np.argmax(correlation_matrix[0])

# Find the movies rated by this user but not by the first user
movies_rated_by_similar_user = user_movie_matrix.iloc[similar_user_index]
movies_not_seen_by_first_user = user_movie_matrix.iloc[0].isna()

# Recommend a movie
recommended_movie = movies_rated_by_similar_user[movies_not_seen_by_first_user].idxmax()

recommended_movie