# Matrix Factorization 

In [255]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.decomposition import TruncatedSVD
import warnings
warnings.filterwarnings('ignore')

In [256]:
movies=pd.read_csv("data/ml-100k/movies.csv")
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [257]:
ratings=pd.read_csv("data/ml-100k/ratings.csv")
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [258]:
movie_rating = pd.merge(ratings, movies, on='movieId')
movie_rating.head(10)

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,1,3,4.0,964981247,Grumpier Old Men (1995),Comedy|Romance
2,1,6,4.0,964982224,Heat (1995),Action|Crime|Thriller
3,1,47,5.0,964983815,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
4,1,50,5.0,964982931,"Usual Suspects, The (1995)",Crime|Mystery|Thriller
5,1,70,3.0,964982400,From Dusk Till Dawn (1996),Action|Comedy|Horror|Thriller
6,1,101,5.0,964980868,Bottle Rocket (1996),Adventure|Comedy|Crime|Romance
7,1,110,4.0,964982176,Braveheart (1995),Action|Drama|War
8,1,151,5.0,964984041,Rob Roy (1995),Action|Drama|Romance|War
9,1,157,5.0,964984100,Canadian Bacon (1995),Comedy|War


In [259]:
columns = ['timestamp', 'genres']
movie_rating = movie_rating.drop(columns, axis=1)
movie_rating.head(10)

Unnamed: 0,userId,movieId,rating,title
0,1,1,4.0,Toy Story (1995)
1,1,3,4.0,Grumpier Old Men (1995)
2,1,6,4.0,Heat (1995)
3,1,47,5.0,Seven (a.k.a. Se7en) (1995)
4,1,50,5.0,"Usual Suspects, The (1995)"
5,1,70,3.0,From Dusk Till Dawn (1996)
6,1,101,5.0,Bottle Rocket (1996)
7,1,110,4.0,Braveheart (1995)
8,1,151,5.0,Rob Roy (1995)
9,1,157,5.0,Canadian Bacon (1995)


In [260]:
movie_rating.shape

(100836, 4)

In [261]:
user_item_matrix = movie_rating.pivot_table(index='userId',columns='title',values='rating').fillna(0)

print(user_item_matrix.shape)
user_item_matrix.head(10)


(610, 9719)


title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [262]:
X = user_item_matrix.values.T
X.shape

(9719, 610)

# SVD

In [263]:
SVD = TruncatedSVD(n_components=120, random_state=17)
matrix = SVD.fit_transform(X)
matrix.shape

(9719, 120)

In [264]:
corr = np.corrcoef(matrix)
corr.shape

(9719, 9719)

In [265]:
movie_titles = user_item_matrix.columns
movie_titles_list = list(movie_titles)
indx = movie_titles_list.index("Captain America: The First Avenger (2011)")

# Edge of Tomorrow (2014)
# Captain America: The First Avenger (2011)

In [266]:
corr_indx  = corr[indx]
list(movie_titles[(corr_indx >= 0.7)])

['Amazing Spider-Man, The (2012)',
 'Avengers, The (2012)',
 'Avengers: Age of Ultron (2015)',
 'Captain America: The First Avenger (2011)',
 'Captain America: The Winter Soldier (2014)',
 'Edge of Tomorrow (2014)',
 'Guardians of the Galaxy (2014)',
 'Iron Man 2 (2010)',
 'Iron Man 3 (2013)',
 'Scott Pilgrim vs. the World (2010)',
 'Star Trek Into Darkness (2013)',
 'Thor (2011)',
 'Thor: The Dark World (2013)',
 'X-Men: Days of Future Past (2014)',
 'X-Men: First Class (2011)']

# ALS Method

In [267]:
R = user_item_matrix.values
num_users, num_items = R.shape

def als(R, factors=200, reg=0.1, iters=15):
    num_users, num_items = R.shape
    U = np.random.normal(scale=1./factors, size=(num_users, factors))
    V = np.random.normal(scale=1./factors, size=(num_items, factors))
    
    for _ in range(iters):
        # Update user factors
        for u in range(num_users):
            idx = R[u, :] > 0
            V_i, r_u = V[idx], R[u, idx]
            if len(r_u) > 0:
                A = V_i.T @ V_i + reg * np.eye(factors)
                b = V_i.T @ r_u
                U[u] = np.linalg.solve(A, b)
        # Update item factors
        for i in range(num_items):
            idx = R[:, i] > 0
            U_i, r_i = U[idx], R[idx, i]
            if len(r_i) > 0:
                A = U_i.T @ U_i + reg * np.eye(factors)
                b = U_i.T @ r_i
                V[i] = np.linalg.solve(A, b)
    return U, V

U, V = als(R, factors=20, reg=0.1, iters=10)



In [268]:
# Predict rating for all items for a user
def predict_user_scores(user_id, U, V):
    return U[user_id] @ V.T   

# Top-k recommendations for a user
def top_k_recommendations_for_user(user_id, U, V, R, movie_titles, k=5):
    scores = U[user_id] @ V.T   
    rated = R[user_id] > 0      
    scores[rated] = -np.inf

    top_items = np.argsort(scores)[-k:][::-1]
    top_scores = scores[top_items]

    recommended_movies = [(movie_titles[i], top_scores[j]) for j, i in enumerate(top_items)]
    return recommended_movies


# Top-k similar items for a given movie
def top_k_similar_movies(movie_name, V, movie_titles, k=5):
    if movie_name not in movie_titles:
        raise ValueError(f"Movie '{movie_name}' not found in movie list.")
    movie_id = movie_titles.get_loc(movie_name)

    # cosine similarity with other items
    item_vec = V[movie_id]
    sims = V @ item_vec / (np.linalg.norm(V, axis=1) * np.linalg.norm(item_vec) + 1e-10)
    sims[movie_id] = -np.inf  

    # top-k similar items
    top_items = np.argsort(sims)[-k:][::-1]
    top_scores = sims[top_items]

    similar_movies = [(movie_titles[i], top_scores[j]) for j, i in enumerate(top_items)]
    return similar_movies



In [269]:
movie_titles = user_item_matrix.columns

user_id = 1
recommendations = top_k_recommendations_for_user(user_id, U, V, R, movie_titles, k=5)

print(f"Top recommendations for User {user_id}:")
for m, s in recommendations:
    print(f"{m} (predicted score: {s:.2f})")



Top recommendations for User 1:
In the Name of the Father (1993) (predicted score: 7.78)
Backdraft (1991) (predicted score: 7.50)
Thin Red Line, The (1998) (predicted score: 7.39)
Beavis and Butt-Head Do America (1996) (predicted score: 7.10)
Ice Age 2: The Meltdown (2006) (predicted score: 7.07)


In [270]:
movie='Captain America: The First Avenger (2011)'

similar_movies = top_k_similar_movies(movie, V, movie_titles, k=5)
print(f"Movies similar to {movie}:")
for m, s in similar_movies:
    print(f"{m} (score: {s:.2f})")


Movies similar to Captain America: The First Avenger (2011):
X-Men: First Class (2011) (score: 0.80)
Iron Man 3 (2013) (score: 0.75)
Thor (2011) (score: 0.73)
Dawn of the Planet of the Apes (2014) (score: 0.73)
WALL·E (2008) (score: 0.72)
