In [11]:
import pandas as pd

# Load u.data (userId, itemId, rating, timestamp) — tab-separated
column_names = ['user_id', 'item_id', 'rating', 'timestamp']
ratings = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/ml-100k/u.data', sep='\t', names=column_names)

In [12]:
ratings.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [13]:
# Load u.item — movie metadata (tab-separated with encoding issue)
movies = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/ml-100k/u.item', sep='|', encoding='latin-1', header=None)
movies = movies[[0, 1]]
movies.columns = ['item_id', 'title']
movies.head()

In [15]:
# Merge ratings with movie titles
data = pd.merge(ratings, movies, on='item_id')
data.head()

Unnamed: 0,user_id,item_id,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,186,302,3,891717742,L.A. Confidential (1997)
2,22,377,1,878887116,Heavyweights (1994)
3,244,51,2,880606923,Legends of the Fall (1994)
4,166,346,1,886397596,Jackie Brown (1997)


### creating user-item matrix

In [16]:
# Pivot to create matrix: users as rows, movies as columns
user_movie_matrix = data.pivot_table(index='user_id', columns='title', values='rating')
user_movie_matrix.fillna(0, inplace=True)
user_movie_matrix.head()

title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,2.0,5.0,0.0,0.0,3.0,4.0,0.0,0.0,...,0.0,0.0,0.0,5.0,3.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,2.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,...,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0,0.0


### calculating similarities between movies

In [17]:
from sklearn.metrics.pairwise import cosine_similarity

# Transpose to get movie-user matrix for item-item similarity
movie_user_matrix = user_movie_matrix.T

# Calculate cosine similarity
similarity = cosine_similarity(movie_user_matrix)

# Store similarity matrix in a DataFrame
similarity_df = pd.DataFrame(similarity, index=movie_user_matrix.index, columns=movie_user_matrix.index)
similarity_df.head()

title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Til There Was You (1997),1.0,0.0,0.024561,0.099561,0.185236,0.159265,0.0,0.052203,0.0,0.033326,...,0.0,0.0,0.0,0.027774,0.11884,0.142315,0.02907,0.0,0.110208,0.0
1-900 (1994),0.0,1.0,0.014139,0.009294,0.007354,0.004702,0.010055,0.067038,0.0,0.0,...,0.152499,0.015484,0.0,0.069284,0.018243,0.023408,0.006694,0.07964,0.042295,0.0
101 Dalmatians (1996),0.024561,0.014139,1.0,0.167006,0.061105,0.143878,0.203781,0.225803,0.027642,0.092337,...,0.0,0.021965,0.030905,0.274877,0.204267,0.101199,0.056976,0.172155,0.045714,0.0
12 Angry Men (1957),0.099561,0.009294,0.167006,1.0,0.056822,0.167235,0.304078,0.422506,0.072682,0.394854,...,0.060946,0.016502,0.0,0.40327,0.259436,0.145519,0.105226,0.038901,0.060101,0.081261
187 (1997),0.185236,0.007354,0.061105,0.056822,1.0,0.132327,0.042928,0.06506,0.043133,0.0273,...,0.0,0.141997,0.0,0.068257,0.067786,0.091293,0.09949,0.025184,0.142667,0.096449


### defining recommender function

In [18]:
def get_similar_movies(movie_name, top_n=5):
    if movie_name not in similarity_df.columns:
        return f"Movie '{movie_name}' not found in dataset."

    similar_scores = similarity_df[movie_name].sort_values(ascending=False)
    similar_movies = similar_scores.iloc[1:top_n+1]
    return similar_movies

### example

In [19]:
movie_to_search = "Star Wars (1977)"
print(f"\nTop 5 movies similar to '{movie_to_search}':\n")
print(get_similar_movies(movie_to_search))


Top 5 movies similar to 'Star Wars (1977)':

title
Return of the Jedi (1983)          0.884476
Raiders of the Lost Ark (1981)     0.764885
Empire Strikes Back, The (1980)    0.749819
Toy Story (1995)                   0.734572
Godfather, The (1972)              0.697332
Name: Star Wars (1977), dtype: float64
