In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Load Users





In [None]:
def load_users(file_path):
    users = {}
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            parts = line.strip().split('\t')
            user_id = parts[0]
            if user_id not in users:
                users[user_id] = []
            users[user_id].append({
                'movie_id': parts[1],
                'freebase_id': parts[2],
                'movie_name': parts[3],
                'release_year': parts[4],
                'box_office': parts[5],
                'runtime': parts[6],
                'languages': parts[7],
                'countries': parts[8],
                'genres': parts[9],
                'plot_summary': parts[10]
            })
    print(f"Loaded {len(users)} users")
    return users


Load Movies

In [None]:
def load_movies(file_path):
    movies = {}
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            parts = line.strip().split('\t')
            movie_id = parts[0]
            movies[movie_id] = {
                'freebase_id': parts[1],
                'movie_name': parts[2],
                'release_year': parts[3],
                'box_office': parts[4],
                'runtime': parts[5],
                'languages': parts[6],
                'countries': parts[7],
                'genres': parts[8],
                'plot_summary': parts[9]
            }
    print(f"Loaded {len(movies)} movies")
    return movies




Create profile

In [None]:
def create_user_profiles(users):
    user_profiles = {}
    for user_id, movies in users.items():
        genre_count = {}
        for movie in movies:
            genres = movie['genres'].split(', ')
            for genre in genres:
                genre = genre.split(': ')[1].strip('""}')
                if genre in genre_count:
                    genre_count[genre] += 1
                else:
                    genre_count[genre] = 1
        total_movies = len(movies)
        user_profiles[user_id] = {genre: count / total_movies for genre, count in genre_count.items()}
    return user_profiles


Cosine Similarity

In [None]:
import math
def cosine_similarity(profile1, profile2):
    dot_product = sum(profile1.get(k, 0) * profile2.get(k, 0) for k in set(profile1) | set(profile2))
    norm1 = math.sqrt(sum(v ** 2 for v in profile1.values()))
    norm2 = math.sqrt(sum(v ** 2 for v in profile2.values()))
    return dot_product / (norm1 * norm2) if norm1 and norm2 else 0


Recommend Movies

In [None]:
def recommend_movies(user_profiles, movies, user_watched_movies):
    recommendations = {}
    for user_id, profile in user_profiles.items():
        scores = []
        watched_movie_ids = {movie['movie_id'] for movie in user_watched_movies[user_id]}
        for movie_id, movie_data in movies.items():
            if movie_id not in watched_movie_ids:
                genres = movie_data['genres'].split(', ')
                movie_profile = {}
                for genre in genres:
                    if ': ' in genre:
                        genre_name = genre.split(': ')[1].strip('""}')
                    else:
                        genre_name = genre.strip('""}')
                    movie_profile[genre_name] = 1
                similarity = cosine_similarity(profile, movie_profile)
                scores.append((movie_id, similarity))
        scores.sort(key=lambda x: x[1], reverse=True)
        recommendations[user_id] = scores[:10]
    return recommendations



Save And Run

In [None]:
def save_and_display_recommendations_to_file(recommendations, movies, filename='part1_0615971_output.txt'):
    with open(filename, 'w') as file:
        for user, recs in recommendations.items():
            header = f"User {user}:\n"
            file.write(header)
            print(header.strip())

            for movie_id, score in recs:
                movie_name = movies[movie_id]['movie_name']
                line = f"  Movie-{movie_name} (ID: {movie_id}): {score:.4f}\n"
                file.write(line)
                print(line.strip())



# Load data
users = load_users('/content/drive/MyDrive/ColabNotebooks/movie.users.txt')
movies = load_movies('/content/drive/MyDrive/ColabNotebooks/movie.metadata_plot_summary_test.txt')

# Create user profiles
user_profiles = create_user_profiles(users)

# Recommend movies
recommendations = recommend_movies(user_profiles, movies, users)

# Display results
save_and_display_recommendations_to_file(recommendations, movies)


Loaded 3 users
Loaded 43 movies
User 1:
Movie-Pain & Gain (ID: 32258845): 0.6218
Movie-Getaway (ID: 35815373): 0.3928
Movie-The Texas Chainsaw Massacre 3D (ID: 32291194): 0.3100
Movie-Matru Ki Bijlee Ka Mandola (ID: 34737436): 0.2967
Movie-The Croods (ID: 26508404): 0.2967
Movie-The Abandoned State (ID: 34711939): 0.2826
Movie-The Tomb (ID: 35313233): 0.1998
Movie-Now You See Me (ID: 35699958): 0.1998
Movie-Invertigo (ID: 36034066): 0.1978
Movie-Escape from Planet Earth (ID: 33757322): 0.1978
User 2:
Movie-Turbo (ID: 34643655): 0.8714
Movie-Invertigo (ID: 36034066): 0.7988
Movie-Escape from Planet Earth (ID: 33757322): 0.7988
Movie-Elysium (ID: 30340901): 0.6162
Movie-Welcome to the Punch (ID: 32784153): 0.6162
Movie-The Tomb (ID: 35313233): 0.5809
Movie-Now You See Me (ID: 35699958): 0.5809
Movie-The Young and Prodigious Spivet (ID: 36332870): 0.5798
Movie-Noah (ID: 36534974): 0.5774
Movie-Pacific Rim (ID: 32112949): 0.5485
User 3:
Movie-Gangster Squad (ID: 32438858): 0.9988
Movie-Dec