In [1]:
import math
import numpy as np
from sklearn.metrics import mean_squared_error
from model import SVD
from util import load_data
from util import load_similarities
from util import generate_artificial_ratings

In [2]:
similarities = load_similarities()

Loading similarities...
Loading similarities completed in:  0:02:08.979926


In [None]:
result = {}
for data_removed in [.95, .80, .60, .40, .20]:
    train_set, test_set, sparcity = load_data(data_removed=data_removed)
    artificial_ratings = generate_artificial_ratings(train_set, test_set, similarities)
    
    # Baseline model
    print("Baseline model")
    baseline_algo = SVD()
    baseline_algo.fit(train_set)
    
    baseline_algo_predictions = baseline_algo.test(test_set)
    baseline_algo_rmse = math.sqrt(mean_squared_error(baseline_algo_predictions[:, 0],
                                                      baseline_algo_predictions[:, 1]))
    print("RMSE: %.4f" % baseline_algo_rmse)
    
    print("\nModel based on artificial data")
    artificial_algo = SVD()
    artificial_algo.fit(artificial_ratings)
    
    artificial_algo_predictions = artificial_algo.test(test_set)
    artificial_algo_rmse = math.sqrt(mean_squared_error(artificial_algo_predictions[:, 0],
                                                        artificial_algo_predictions[:, 1]))
    print("RMSE: %.4f" % artificial_algo_rmse)
    
    print("\nTwo separate models")
    average_algo_predictions = [(real + artificial) / 2
                            for real, artificial in zip(baseline_algo_predictions[:, 1],
                                                        artificial_algo_predictions[:, 1])]
    average_algo_rmse = math.sqrt(mean_squared_error(artificial_algo_predictions[:, 0],
                                                 average_algo_predictions))
    print("RMSE: %.4f" % average_algo_rmse)

    print("\nOne Combined model with different learning rate for artificial and real ratings")
    combined_ratings = np.vstack((train_set, artificial_ratings))
    np.random.shuffle(combined_ratings)
    combined_algo = SVD()
    combined_algo.fit(combined_ratings)
    
    combined_algo_predictions = combined_algo.test(test_set)
    combined_algo_rmse = math.sqrt(mean_squared_error(combined_algo_predictions[:, 0],
                                                      combined_algo_predictions[:, 1]))
    print("RMSE: %.4f" % combined_algo_rmse)
    
    result[sparcity] = (baseline_algo_rmse, artificial_algo_rmse, average_algo_rmse, combined_algo_rmse)
    print();print()

Loading data...
Loading data completed. Sparcitiy: 0.9968
Loading artificial ratings...
