In [3]:
import numpy as np
import pandas as pd

from src import utils, models, metrics

In [4]:
# read data
df_movies, df_users, df_ratings = utils.read_pickles("../../data/ml-1m-after_eda/")

#### 1 Ordered splitting. 80% data in train

In [21]:
# split data
train, test = utils.TrainTestSplitter.split_by_percent(df_ratings, 0.8)
print(f"Train shape: {train.shape}")
print(f"Test shape: {test.shape}")

# get true
true_scores = test["Rating"]

# get predict
base_model = models.BaseModelAverage()
base_model.fit(train)
predicted_scores = base_model.predict(test["MovieID"])

# evaluate
print(f"ML merics: {metrics.ml_metrics(true_scores, predicted_scores)}")
print(f"Predictive merics: {metrics.predictive_metrics(test, predicted_scores, k=5)}")
print(f"Rank merics: {metrics.rank_metrics(test, predicted_scores, k=1, threshold=5)}")

#### 2 Random splitting (only for experiment). 80% data in train

In [4]:
# split data
train, test = utils.TrainTestSplitter.split_by_percent(df_ratings, 0.8, random_split=True)
print(f"Train shape: {train.shape}")
print(f"Test shape: {test.shape}")

# get true
true_scores = test["Rating"]

# get predict
base_model = models.BaseModelAverage()
base_model.fit(train)
predicted_scores = base_model.predict(test["MovieID"])

# evaluate
print(f"ML merics: {metrics.ml_metrics(true_scores, predicted_scores)}")
print(f"Predictive merics: {metrics.predictive_metrics(test, predicted_scores, k=5, threshold=4)}")
print(f"Rank merics: {metrics.rank_metrics(test, predicted_scores, k=5, threshold=4)}")

#### 3 Users based splitting. 10 reviews for each user in test

In [5]:
# split data
train, test = utils.TrainTestSplitter.split_by_users(df_ratings, n_reviews_in_test=10)
print(f"Train shape: {train.shape}")
print(f"Test shape: {test.shape}")

# get true
true_scores = test["Rating"]

# get predict
base_model = models.BaseModelAverage()
base_model.fit(train)
predicted_scores = base_model.predict(test["MovieID"])

# evaluate
print(f"ML merics: {metrics.ml_metrics(true_scores, predicted_scores)}")
print(f"Predictive merics: {metrics.predictive_metrics(test, predicted_scores, k=5, threshold=4)}")
print(f"Rank merics: {metrics.rank_metrics(test, predicted_scores, k=5, threshold=4)}")