# Best practice: Set root directory to the root of the project


In [9]:
%load_ext autoreload
%autoreload 2
from constants import ROOT_DIR
import os
import sys
if ROOT_DIR not in sys.path:
    sys.path.append(ROOT_DIR)
os.chdir(ROOT_DIR)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load Imports and Configs

In [10]:
import pandas as pd
from configs.data_config import InferenceDataConfig
from configs.model_config import ModelConfig
from src.data_processing.movies import ProcessMovies
from src.data_processing.users import ProcessUsers
from src.data_processing.ratings import ProcessRatings

data_config = InferenceDataConfig()
model_config = ModelConfig()

# Read Inference Data


In [11]:
users = pd.read_csv(data_config.raw_users.file_path, **data_config.raw_users.read_args)
movies = pd.read_csv(data_config.raw_movies.file_path, **data_config.raw_movies.read_args)
ratings = pd.read_csv(data_config.raw_ratings.file_path, **data_config.raw_ratings.read_args)


# Data processing for InferenceData


In [7]:
process_users = ProcessUsers(users, model_config)
process_movies = ProcessMovies(movies, model_config)
process_ratings = ProcessRatings(ratings, model_config)

process_users.process_for_inference()
process_movies.process_for_inference()
process_ratings.process_for_inference()


[32m2025-05-04 20:25:19.502[0m | [1mINFO    [0m | [36msrc.data_processing.users[0m:[36mprocess_for_inference[0m:[36m18[0m - [1mFunction 'process_for_inference' executed in 0.0015 seconds[0m
[32m2025-05-04 20:25:19.506[0m | [1mINFO    [0m | [36msrc.data_processing.movies[0m:[36mprocess_for_inference[0m:[36m19[0m - [1mFunction 'process_for_inference' executed in 0.0040 seconds[0m
[32m2025-05-04 20:25:19.507[0m | [1mINFO    [0m | [36msrc.data_processing.ratings[0m:[36mprepare_columns_for_ranking[0m:[36m20[0m - [1mPreparing columns for ranking[0m
[32m2025-05-04 20:25:19.689[0m | [1mINFO    [0m | [36msrc.data_processing.ratings[0m:[36mprepare_columns_for_ranking[0m:[36m27[0m - [1mCompleted preparing columns for ranking[0m
[32m2025-05-04 20:25:19.689[0m | [1mINFO    [0m | [36msrc.utils.utils[0m:[36mprocess_for_inference[0m:[36m0[0m - [1mFunction 'process_for_inference' executed in 0.1821 seconds[0m


<src.data_processing.ratings.ProcessRatings at 0x12fa808f0>

# Save Processed Data

In [8]:
process_users.output.to_parquet(data_config.processed_users.file_path)
process_movies.output.to_parquet(data_config.processed_movies.file_path)
process_ratings.output.to_parquet(data_config.processed_ratings.file_path)