In [3]:
import numpy as np
import pandas as pd
from scipy.sparse.linalg import svds
from sklearn.metrics import mean_squared_error

In [4]:
df = pd.read_csv(r'C:\Users\subha\OneDrive\Desktop\netflix_titles.csv')

In [5]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [6]:
df.shape

(8807, 12)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8807 entries, 0 to 8806
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       8807 non-null   object
 1   type          8807 non-null   object
 2   title         8807 non-null   object
 3   director      6173 non-null   object
 4   cast          7982 non-null   object
 5   country       7976 non-null   object
 6   date_added    8797 non-null   object
 7   release_year  8807 non-null   int64 
 8   rating        8803 non-null   object
 9   duration      8804 non-null   object
 10  listed_in     8807 non-null   object
 11  description   8807 non-null   object
dtypes: int64(1), object(11)
memory usage: 825.8+ KB


In [12]:
movies = df[df['type'] == 'Movie'].head(500)
movies = movies[['show_id', 'title']]

In [14]:
np.random.seed(42)
n_users = 100  
n_movies = len(movies)

In [17]:
ratings_matrix = np.random.randint(1, 6, (n_users, n_movies))  
ratings_df = pd.DataFrame(ratings_matrix, columns=movies['title'])

In [19]:
k = min(n_users, n_movies) - 1 if min(n_users, n_movies) > 1 else 1

In [21]:
def get_safe_k(n_users, n_movies):
    return max(1, min(n_users, n_movies) // 2)  # Use half of the smaller dimension
k = get_safe_k(n_users, n_movies)

In [23]:
U, S, Vt = np.linalg.svd(ratings_matrix, full_matrices=False)
S = np.diag(S[:k])  # Keep only the top k singular values
U = U[:, :k]
Vt = Vt[:k, :]

In [25]:
predicted_ratings = np.dot(np.dot(U, S), Vt)
predicted_ratings_df = pd.DataFrame(predicted_ratings, columns=movies['title'])

In [27]:
def recommend_movies(user_id, num_recommendations=5):
    user_ratings = predicted_ratings_df.iloc[user_id]
    recommended_movies = user_ratings.sort_values(ascending=False).head(num_recommendations)
    return recommended_movies

In [42]:
user_id = 90
recommendations = recommend_movies(user_id)
print("Recommended Movies for User", user_id)
print(recommendations)

Recommended Movies for User 90
title
The Blue Lagoon                 6.046461
Osuofia in London               5.903456
The Original Kings of Comedy    5.717495
Cousins                         5.690016
Pineapple Express               5.560237
Name: 90, dtype: float64


In [31]:
mask = np.random.rand(*ratings_matrix.shape) < 0.8
train_matrix = ratings_matrix * mask  # Training set (80% of ratings)
test_matrix = ratings_matrix * ~mask  # Test set (20% of ratings)

In [33]:
mask_test = test_matrix > 0  
rmse = np.sqrt(mean_squared_error(test_matrix[mask_test], predicted_ratings[mask_test]))
rmse_percentage = (rmse / 5) * 100  

print(f'RMSE: {rmse:.4f}')
print(f'RMSE Percentage: {rmse_percentage:.2f}%')

RMSE: 0.7915
RMSE Percentage: 15.83%
