## Import all the necessary libraries

In [1]:
import numpy as np
from collections import defaultdict
data_path = 'ml-latest-small/ratings.csv'
n_users = 6040
n_movies = 3706

### Function to load the rating data from dataset

In [2]:
def load_rating_data(data_path, n_users, n_movies):
    """
    Load rating data from file and also return the number of
    ratings for each movie and movie_id index mapping.

    @param data_path: path of the rating data file
    @param n_users: number of users
    @param n_movies: number of movies that have ratings
    @return: rating data in the numpy array of [user, movie];
             movie_n_rating, {movie_id: number of ratings};
             movie_id_mapping, {movie_id: column index in rating data}
    """

    data = np.zeros((n_users, n_movies), dtype=np.float32)
    movie_id_mapping = {}
    movie_n_rating = defaultdict(int)
    current_movie_index = 0

    with open(data_path, 'r') as file:
        next(file)  # Skiping the header row

        for line in file:
            user_id, movie_id, rating, _ = line.strip().split(',')
            user_id = int(user_id) - 1
            movie_id = int(movie_id)
            rating = float(rating)

            if movie_id not in movie_id_mapping:
                if current_movie_index >= n_movies:
                    continue
                movie_id_mapping[movie_id] = current_movie_index
                current_movie_index += 1

            movie_index = movie_id_mapping[movie_id]
            data[user_id, movie_index] = rating
            if rating > 0:
                movie_n_rating[movie_id] += 1

    return data, movie_n_rating, movie_id_mapping

"""
- Efficient File Reading: The file is read line-by-line, and the header row is skipped using next(file).
- Mapping Movie IDs: The movie ID mapping is done in a single pass, and the mapping is only updated 
    if the current movie index is within bounds.
- Streamlined Logic: Combined checks and assignments to ensure only valid movie IDs are processed.
"""

'\n- Efficient File Reading: The file is read line-by-line, and the header row is skipped using next(file).\n- Mapping Movie IDs: The movie ID mapping is done in a single pass, and the mapping is only updated \n    if the current movie index is within bounds.\n- Streamlined Logic: Combined checks and assignments to ensure only valid movie IDs are processed.\n'

### Loading data 

In [3]:
data, movie_n_rating, movie_id_mapping = load_rating_data(data_path, n_users, n_movies)