In [1]:
from scipy.sparse import csr_matrix
from scipy.stats import pearsonr
from numpy.linalg import matrix_rank
from tqdm.notebook import tqdm
from enum import IntEnum
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import jax.numpy as jnp
import jax
import time

In [2]:
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

In [3]:
userIds_available = set()
movieIds_available = set()
for id in np.array(ratings['userId'] , dtype = int):
    userIds_available.add(id)

for id in np.array(movies['movieId'] , dtype = int):
    movieIds_available.add(id)
    
userIds_available = list(userIds_available)
movieIds_available = list(movieIds_available)

userIds_available.sort()
movieIds_available.sort()

print(len(userIds_available) , len(movieIds_available))

668 10329


In [4]:
def binary_search(array , x):
    low = 0
    high = len(array) - 1
    while(high >= low):
        mid = int((high + low) / 2)
        
        if array[mid] == x:
            return mid
        
        elif array[mid] > x:
            high = mid - 1
            
        else:
            low = mid + 1

    print("Element %d not found" % x)
    return -1

In [5]:
rows = np.array(ratings['userId'])
cols = np.array(ratings['movieId'])
vals = np.array(ratings['rating'])

n = rows.max() + 1 # Number of user
p = cols.max() + 1# Number of movies
N = len(vals) # Number of ratings

# Update the arrays rows/cols with the true position instead of the ids 
for i_user in tqdm(range(len(rows))):
    rows[i_user] = binary_search(userIds_available ,  rows[i_user])

for i_movie in tqdm(range(len(cols))):
    cols[i_movie] = binary_search(movieIds_available , cols[i_movie])

n , p , N

  0%|          | 0/105339 [00:00<?, ?it/s]

  0%|          | 0/105339 [00:00<?, ?it/s]

(669, 149533, 105339)

In [6]:
# Shuffle the data
indexes = np.arange(N)
np.random.seed(0) # for reproducibility
np.random.shuffle(indexes)
# Reordering the arrays
rows = rows[indexes]
cols = cols[indexes]
vals = vals[indexes]

In [7]:
# Split data in training and testing
num_training = int(N * 0.8)

rows_train  = rows[:num_training]
cols_train  = cols[:num_training]
vals_train  = vals[:num_training]
rows_test   = rows[num_training:]
cols_test   = cols[num_training:]
vals_test   = vals[num_training:]

print(len(rows_train) , len(cols_train) , len(vals_train))

84271 84271 84271


In [8]:
# Initialize the matrix with all zeros
ratings_matrix = np.zeros((len(userIds_available) , len(movieIds_available)))
# Update the matrix with the known values (contained in vals_train array)
ratings_matrix[rows_train, cols_train] = vals_train
#print(ratings_matrix)

In [9]:
# Count the number of missing values
def count_missing_values(matrix):
    missing_values = 0

    for i_user in tqdm(range(matrix.shape[0])):
        for j_movie in range(matrix.shape[1]):
            # If the movie in position j_movie hasn't a rating
            if matrix[i_user , j_movie] == 0:
                missing_values += 1

    print("There are %d missing valuess" % (missing_values))
    print("There are %d values inserted" % (matrix.shape[0] * matrix.shape[1] - missing_values))
    print("There are %d values" % (matrix.shape[0] * matrix.shape[1]))
    
count_missing_values(ratings_matrix)

  0%|          | 0/668 [00:00<?, ?it/s]

There are 6815501 missing valuess
There are 84271 values inserted
There are 6899772 values


In [10]:
# Put in a set all the genres available
genre_available = set()

for i in range(movies.shape[0]):
    genres = movies['genres'][i].split('|')
    for g in genres: genre_available.add(g)

# print("All genres available are: " , id_available , genre_available)

In [11]:
num_movies = len(movieIds_available)
num_genres = len(genre_available)
print("Max movie id: " , max(movies['movieId']))
print("Number of movies is: " , num_movies)
print("Number of genres is: " , num_genres)

Max movie id:  149532
Number of movies is:  10329
Number of genres is:  20


In [12]:
# Initialize the matrix with all zeros of int8 type
correlation_matrix = np.zeros((num_movies , num_genres) , dtype = np.int8)
# Update the table with the correspondance
for i in tqdm(range(movies.shape[0])):
    id = movies['movieId'][i]
    # Take the right position in the matrix
    id = movieIds_available.index(id)

    genres = movies['genres'][i].split('|')
    for pos , g in enumerate(genre_available):
        if g in genres:
            correlation_matrix[id , pos] = 1

  0%|          | 0/10329 [00:00<?, ?it/s]

In [13]:
def cosine_similarity(vector1 , vector2):
    """
    vector1 and vector2 are rows of correlation_matrix or of ratings_matrix
    """
    return np.dot(vector1, vector2)/(np.linalg.norm(vector1) * np.linalg.norm(vector2))

def cosine_similarity_users(vector1 , vector2):
    '''
    Apply this similarity between users -> want to find similar behaviour in rating common movies and then
        use it for movies that one of the two hasn't watched yet, not use them here
    vector1 and vector2 are vector containing ratings of two users 
    '''
    common_vector1 = []
    common_vector2 = []
    
    # Take just the movies rated in both the array to find a similarity between the two users
    for i in range(len(vector1)):
        if vector1[i] != 0 and vector2[i] != 0:
            common_vector1.append(vector1[i])
            common_vector2.append(vector2[i])
    # If the two vectors(users) has at least 5 common ratings
    if len(common_vector1) > 5:
        return np.dot(common_vector1, common_vector2)/(np.linalg.norm(common_vector1) * np.linalg.norm(common_vector2))
    else:
        return 0

In [14]:
# Creating clusters for users
users_cluster = {}
threshold = 0.95
user_index_cluster = {}
userIds_copy = userIds_available.copy()

num_cluster = 0
index2 = 1
# To show the progress bar
pbar = tqdm(total = len(userIds_copy))
while len(userIds_copy) > 0:
    #pick always the first of the actual list
    id_x = userIds_copy[0]
    list_users = []
    user_index_cluster[id_x] = num_cluster
    list_users.append(id_x)
    #now pick all the others that follow
    while ( index2 < len(userIds_copy)):
        id_y = userIds_copy[index2]
        sim = cosine_similarity_users(ratings_matrix[userIds_available.index(id_x)], ratings_matrix[userIds_available.index(id_y)])
        # If they are similar enough
        if sim >= threshold:
            user_index_cluster[id_y] = num_cluster
            list_users.append(id_y)
            userIds_copy.remove(id_y)
            # Update the bar when an element is deleted
            pbar.update(1)
        else :
            index2 += 1
    userIds_copy.remove(id_x)
    # Update the bar when an element is deleted
    pbar.update(1)
    users_cluster[num_cluster] = list_users
    num_cluster += 1
    index2 = 1
    
# Close the bar
pbar.close()
print("Number of cluster is: " , num_cluster)

  0%|          | 0/668 [00:00<?, ?it/s]

Number of cluster is:  161


In [None]:
# Create a dictionary with 668 elements: one for each user.
# The key is the user id, the value is a list of object Movie
# The object Movie is made of 2 attributes: 
#        type_of_movie: array with the genre of the movie(the row of correlation matrix)
#        ratings: an array of 10 elements with 1 in the position of the corresponding rating
# The object movie are created for all the rating of the user plus the rating of movie that the user hasn't watched 
#        with, for the last one, the average rating of other users in the same cluster

In [15]:
class Movie:
    def __init__(self , genre , rating):
        self.genre = genre
        self.possible_rating = list([0.5 , 1 , 1.5 , 2 , 2.5 , 3 , 3.5 , 4 , 4.5 , 5])
        self.rating_one_hot = np.zeros(10)
        self.convert_rating(rating)
        
    def convert_rating(self , rating):
        mean_rating = np.mean(rating)
        # Approximate the rating
        if mean_rating - int(mean_rating) > 0.75:
            mean_rating = int(mean_rating) + 1.0
        elif mean_rating - int(mean_rating) > 0.25:
            mean_rating = int(mean_rating) + 0.5
        else:
            mean_rating = int(mean_rating)
        index = self.possible_rating.index(mean_rating)
        # Update the one-hot array
        self.rating_one_hot[index] = 1

movie = Movie([1 , 0 , 0 , 1] , [2 , 2 , 4.5])
movie.genre , movie.rating_one_hot

([1, 0, 0, 1], array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]))

In [16]:
# Key: user_id , value: list of Movie objects (necessary for the training of the NN)
ratings_for_the_user = {}
# Initiliaze all the values of the dictionary as a list
for userId in userIds_available:
    ratings_for_the_user[userId] = list()
    
# Converting the list of position in list of ids
userIds_to_consider = [userIds_available[rows_train[i]] for i in range(len(rows_train))]
movieIds_to_consider = [movieIds_available[cols_train[i]] for i in range(len(cols_train))]

#print("Starting...")

for cluster , users_in_cluster in tqdm(users_cluster.items()):
    # Consider just the users in the cluster
    rows_to_consider = [userIds_to_consider[i] for i in range(len(userIds_to_consider)) if userIds_to_consider[i] in users_in_cluster]
    cols_to_consider = [movieIds_to_consider[i] for i in range(len(movieIds_to_consider)) if userIds_to_consider[i] in users_in_cluster]
    vals_to_consider = [vals_train[i] for i in range(len(userIds_to_consider)) if userIds_to_consider[i] in users_in_cluster]
    
    
    # Add all the Movie object related to movies rated by the user
    for i in range(len(cols_to_consider)):
        # Take the genres of the movie in position i
        movie_genre = correlation_matrix[movieIds_available.index(cols_to_consider[i])]
        # Create the object Movie for the user in position i with rating in position i 
        movie_obj = Movie(movie_genre , vals_to_consider[i])
        # Append the object in the dictionary
        # Read the old list
        current_value = ratings_for_the_user[rows_to_consider[i]]
        # Add the new object
        current_value.append(movie_obj)
        # Append the new list
        ratings_for_the_user[rows_to_consider[i]] = current_value
        
    #print("Added the ratings to each user")
        
    # Calculate the average of the ratings given to a movie
    # Dictionary containing all the movie already calculated: key is it movieId , value is the average rating
    movie_avgs = {}
    
    for movieId in cols_to_consider:
        # If the movie has not already been considered
        if movieId not in movie_avgs.keys():
            # Take the position of all the ratings for the movie
            indexes = [i for i in range(len(cols_to_consider)) if cols_to_consider[i] == movieId]
            values = [vals_to_consider[i] for i in indexes]
            # Calculate the mean of all the ratings for the movie
            average = np.mean(values)
            # Add the movie with his average
            movie_avgs[movieId] = average
            
    #print("Calculated the average rate for each movie")

    # For each user in the cluster, add Movie objects of movie not watched
    for userId in users_in_cluster:
        # Take just the movies rated by the user
        cols_of_user = [cols_to_consider[i] for i in range(len(rows_to_consider)) if rows_to_consider[i] == userId]
        
        # Read the old list of the user userId
        current_value = ratings_for_the_user[userId]
        
        # For each movie not present, add it
        for movieId , rating in movie_avgs.items():
            # If the user hasn't rated the movie movieId
            if movieId not in cols_of_user:
                # Take the genres of the movie movieId
                movie_genre = correlation_matrix[movieIds_available.index(movieId)]
                # Create the object Movie for the user userId of the movie movieId 
                movie_obj = Movie(movie_genre , movie_avgs[movieId])
                # Add the new object
                current_value.append(movie_obj)
            
        # Append the new list
        ratings_for_the_user[userId] = current_value
                

  0%|          | 0/161 [00:00<?, ?it/s]

In [17]:
def init_params(layers_size):
    params = list()
    for i in range(len(layers_size) - 1):
        w = np.random.randn(layers_size[i + 1] , layers_size[i]) * 2 / np.sqrt(layers_size[i + 1] + layers_size[i])
        b = np.zeros((layers_size[i + 1] , 1))
        params.append(w)
        params.append(b)
    return params

In [22]:
def ANN(x , params):
    W = params[0::2]
    b = params[1::2]
    num_layers = int(len(params) / 2 + 1)
    #x = np.array([movies[i].genre for i in range(len(movies))])
    #y = np.array([movies[i].rating_one_hot for i in range(len(movies))])
    #print(x , y)
    layer = x
    
    for i in range(num_layers - 1):
        layer = W[i] @ layer + b[i]
        
        # If it's not the last layer apply the activation function
        if i < len(W) - 2:
            # If tanh
            #layer = jnp.tanh(layer)
            # If relu
            layer = jnp.maximum(0.0 , layer)
    
    # Apply softmax to obtain a vector of probability as output
    layer = jnp.exp(layer) / jnp.sum(jnp.exp(layer) , axis = 0)
    return layer

In [23]:
# Testing the NN
# Init the params
params = init_params([20 , 32 , 32 , 10])
# Create a random Movie object
movie_obj = Movie(correlation_matrix[0] , 3)
# Call ANN function
#x = np.array([movies[i].genre for i in range(len(movies))])
out = ANN(movie_obj.genre[: , None] , params)
# Chech the shape and the sum
out.shape , np.sum(out , axis = 0)



((10, 1), DeviceArray([1.], dtype=float32))

In [170]:
def RMSE_function(x , y , params):
    err = y - ANN(x , params)
    return jnp.sqrt(jnp.mean(err**2))
    
error = RMSE_function(movie_obj.genre[: , None] , movie_obj.rating_one_hot, params)
error

DeviceArray(0.3056676, dtype=float32)

In [24]:
def loss_crossentropy(x , y , params):
  y_pred = ANN(x , params)
  loss = - jnp.sum(y * jnp.log(y_pred) + (1 - y) * jnp.log(1 - y_pred))
  return loss

loss_crossentropy(movie_obj.genre[: , None] , movie_obj.rating_one_hot, params)

DeviceArray(35.031715, dtype=float32)

In [25]:
# Create a dictionary with key the userId and with value the params associated to him
# Every user has his own neural network
users_params = {}

# Defining the layers size of the neural network
layers_size = [20 , 48 , 48 , 10]

for userId in userIds_available:
    users_params[userId] = init_params(layers_size)

In [26]:
# Use jax to define the grad and then add jit
# If RMSE loss
#grad_jit = jax.jit(jax.grad(RMSE_function , argnums=2))
# If crossentropy loss
grad_jit = jax.jit(jax.grad(loss_crossentropy , argnums=2))

In [27]:
def train_NN(movies , params):
    epochs = 1000
    alpha = 0.9
    learning_rate_min = 1e-3
    learning_rate_max = 1e-2
    learning_rate_decay = 1000
    x = np.array([movies[i].genre[:,None] for i in range(len(movies))])
    y = np.array([movies[i].rating_one_hot[:,None] for i in range(len(movies))])
    if len(x) == 1:
      batch_size = 1
    else:
      batch_size = int(0.2 * len(x)) + 1
    
    v = [0.0 for _ in range(len(params))]
    
    for epoch in range(epochs):
        # Calcule the learning rate
        learning_rate = max(learning_rate_min , learning_rate_max * (1 - epoch) / learning_rate_decay)
        # Creating the random minibatch
        minibatch = np.random.choice(len(x) , batch_size)
        # Calculate the gradient
        grad = grad_jit(x[minibatch] , y[minibatch] , params)
        
        # Update the params with momentum(velocity)
        for i in range(len(params)):
            v[i] = alpha *  v[i] - learning_rate * grad[i]
            params[i] += v[i]

    # Return the updated params for the user
    return params
    

In [28]:
for userId in tqdm(userIds_available):
  # Take the params currently saved
  current_params = users_params[userId]
  # Take the list of movie_obj for the user
  movies_obj = ratings_for_the_user[userId]
  # Train the network
  updated_params = train_NN(movies_obj , current_params)
  # Save the updated params
  users_params[userId] = updated_params

  0%|          | 0/668 [00:00<?, ?it/s]

In [29]:
# Over this value a probability is considered okay
probability_threshold = 0.5
num = 0
# Preidct a rate for each movie and for each user
for user_i in tqdm(range(ratings_matrix.shape[0])):
  # Take the user id corresponding to the row user_i
  userId = userIds_available[user_i]
  # Take the params of the user
  params = users_params[userId]
  # For each movie present calculate the prediction
  for movie_j in range(ratings_matrix.shape[1]):
    # If the user didn't rate the movie
    if ratings_matrix[user_i , movie_j] == 0:
      # Take the genre of the movie movie_j
      movie_genre = correlation_matrix[movie_j]
      # Calculate the prediction
      prediction = ANN(movie_genre[: , None] , params)
      # Check if the max is grater than a threshold
      max_probability = jnp.max(prediction)
      # If the NN is almost sure
      if max_probability > probability_threshold:
        # Take the element with the highest probability (and convert it in the rate number)
        best_rate = float(jnp.argmax(prediction , axis = 0)) / 2 + 0.5
        # Save the prediction
        ratings_matrix[user_i , movie_j] = best_rate

  0%|          | 0/668 [00:00<?, ?it/s]

In [30]:
# Calculate how many prediction have been performed
count_missing_values(ratings_matrix)

  0%|          | 0/668 [00:00<?, ?it/s]

There are 5234942 missing valuess
There are 1664830 values inserted
There are 6899772 values


In [31]:
# Aanalyzing the errors/precision/recall/f1 score after the prediction of the ratings predicted only
# Initialize the list for the evaluation of the initial errors
rows_test_limited = list()
cols_test_limited = list()
vals_test_limited = list()

# Fill the list
def fill_test_lists():
  for i in range(len(rows_test)):
    # Add just the position filled with the algortithm
    if ratings_matrix[rows_test[i]][cols_test[i]] != 0:
        rows_test_limited.append(rows_test[i])
        cols_test_limited.append(cols_test[i])
        vals_test_limited.append(vals_test[i])

# Calculate RMSE and rho
def analyze_starting_errors():
  vals_pred_limited = ratings_matrix[rows_test_limited, cols_test_limited]
  err = vals_test_limited - vals_pred_limited
  RMSE = np.sqrt(np.mean(err**2))
  rho = pearsonr(vals_test_limited, vals_pred_limited)[0]
  return RMSE , rho

# Evaluate the precision and the recall before apply SVT
def precision_and_recall_initial_state():
    total_recommended = 0 # true positive + false negative
    predicted_recommended_items = 0 # true positive + false positive
    predicted_true_recommended_items = 0 # true positive
    # A movie is recommended if it's rating is greater than this value
    recommendation_value = 3
    for i in range(len(rows_test_limited)):
        true_rating = vals_test_limited[i]
        predicted_value = ratings_matrix[rows_test_limited[i]][cols_test_limited[i]]
        # Calculate true positive
        if true_rating >= recommendation_value: 
            total_recommended += 1
            if predicted_value >= recommendation_value:
                predicted_true_recommended_items += 1
        # Calculate true positive + false positive
        if predicted_value >= recommendation_value:
            predicted_recommended_items += 1
                
    print("True positive: " , predicted_true_recommended_items)
    print("True positive + false positive: " , predicted_recommended_items)
    print("True positive + false negative: " , total_recommended)
    precision = predicted_true_recommended_items / predicted_recommended_items
    recall = predicted_true_recommended_items / total_recommended
    print("Precision: " , precision)
    print("Recall: " , recall)
    return precision , recall

# Calculate the F1-score
def F1_measure(precision_value , recall_value):
    return 2 * precision_value * recall_value / ( precision_value + recall_value)

fill_test_lists()
print(analyze_starting_errors()) 
print("At this stage %d values have already been predicted" % len(rows_test_limited))
precision , recall = precision_and_recall_initial_state()
F1_measure(precision , recall)

(1.7406852912851714, 0.19587277296777766)
At this stage 2718 values have already been predicted
True positive:  1416
True positive + false positive:  1681
True positive + false negative:  2151
Precision:  0.842355740630577
Recall:  0.6582984658298466


0.7390396659707723

In [32]:
# Reconstruct rows_train, cols_train, vals_train with all the value of the input + already predicted values
counter = 0
rows_train_updated = list()
cols_train_updated = list()
vals_train_updated = list()
for i_user in tqdm(range(ratings_matrix.shape[0])):
    for j_movie in range(ratings_matrix.shape[1]):
        # If it is a default or predicted value, save the position
        if ratings_matrix[i_user][j_movie] != 0:
            rows_train_updated.append(i_user)
            cols_train_updated.append(j_movie)
            vals_train_updated.append(ratings_matrix[i_user][j_movie])
            counter += 1
print("Saved %d values" % counter)

  0%|          | 0/668 [00:00<?, ?it/s]

Saved 1664830 values


In [33]:
# Calculate RMSE and rho for the whole matrix
def errors():
    vals_pred = X_hat[rows_test, cols_test]
    err = vals_test - vals_pred
    RMSE = np.sqrt(np.mean(err**2))
    rho = pearsonr(vals_test, vals_pred)[0]
    return RMSE , rho

In [35]:
# Perform some evaluations
def precision_and_recall():
    total_recommended = 0 # true positive + false negative
    predicted_recommended_items = 0 # true positive + false positive
    predicted_true_recommended_items = 0 # true positive
    # A movie is recommended if it's rating is greater than this value
    recommendation_value = 3
    for i in range(len(rows_test)):
        true_rating = vals_test[i]
        predicted_value = X_hat[rows_test[i]][cols_test[i]]
        # Calculate true positive
        if true_rating >= recommendation_value: 
            total_recommended += 1
            if predicted_value >= recommendation_value:
                predicted_true_recommended_items += 1
        # Calculate true positive + false positive
        if predicted_value >= recommendation_value:
            predicted_recommended_items += 1
                
    #print("True positive: " , predicted_true_recommended_items)
    #print("True positive + false positive: " , predicted_recommended_items)
    #print("True positive + false negative: " , total_recommended)
    precision = predicted_true_recommended_items / predicted_recommended_items
    recall = predicted_true_recommended_items / total_recommended
    #print("Precision: " , precision)
    #print("Recall: " , recall)
    return precision , recall

In [36]:
n_max_iter = 100

increment_tol = 1e-1

# Threshold parameters
a = 0.01
b = 200

RMSE_list = list()
rho_list = list()
precision_list = list()
recall_list = list()
f1_score_list = list()

# Calculating initial errors / parameters -> using just the predicted value
RMSE , rho = analyze_starting_errors()
precision , recall = precision_and_recall_initial_state()
f1_score = F1_measure(precision , recall)

RMSE_list.append(RMSE)
rho_list.append(rho)
precision_list.append(precision)
recall_list.append(recall)
f1_score_list.append(f1_score)


X_hat = ratings_matrix

for k in tqdm(range(n_max_iter)):
    X_old = X_hat.copy()
    
    # Calculating the SVD of the current matrix
    U,s,VT = np.linalg.svd(X_hat, full_matrices=False)

    # Update the threshold
    threshold = b * np.exp(-k * a)
    #threshold = 50
    
    s[s > 0] = s[s > 0] - threshold
    s[s < 0] = 0

    # Calculating the new matrix trough SVD
    X_hat = U @ np.diag(s) @ VT
    
    # Maintain the default values
    X_hat[rows_train_updated,cols_train_updated] = vals_train_updated
    
    # Some negative values could appear -> set to 0
    X_hat[X_hat < 0] = 0

    # Calculate the increment -> how much the new matrix is different from the previuos one
    increment = np.linalg.norm(X_hat - X_old) 

    if k % 10 == 9:
        # Calculate the errors
        RMSE , rho = errors()
        # Add the errors in the lists
        RMSE_list.append(RMSE)
        rho_list.append(rho)
        precision , recall = precision_and_recall()
        f1_score = F1_measure(precision , recall)
        
        precision_list.append(precision)
        recall_list.append(recall)
        f1_score_list.append(f1_score)
        # Show the errors
        print('================== iter %d - theshold %1.2f - increment %1.3e' % (k+1, threshold, increment))
        print('RMSE: %1.3f' % RMSE)
        print('rho : %1.3f' % rho)
        print('precision: %1.3f' % precision)
        print('recall: %1.3f' % recall)
        print('F1-score: %1.3f' % f1_score)

    # If the increment is lower -> stop the algorithm
    if increment < increment_tol:
        break

True positive:  1416
True positive + false positive:  1681
True positive + false negative:  2151
Precision:  0.842355740630577
Recall:  0.6582984658298466


  0%|          | 0/100 [00:00<?, ?it/s]

RMSE: 1.940
rho : 0.131
precision: 0.890
recall: 0.211
F1-score: 0.342
RMSE: 1.713
rho : 0.161
precision: 0.896
recall: 0.268
F1-score: 0.413
RMSE: 1.608
rho : 0.180
precision: 0.897
recall: 0.302
F1-score: 0.452
RMSE: 1.540
rho : 0.195
precision: 0.898
recall: 0.333
F1-score: 0.485
RMSE: 1.489
rho : 0.208
precision: 0.897
recall: 0.359
F1-score: 0.512
RMSE: 1.447
rho : 0.220
precision: 0.900
recall: 0.385
F1-score: 0.539
RMSE: 1.411
rho : 0.231
precision: 0.900
recall: 0.410
F1-score: 0.563
RMSE: 1.380
rho : 0.241
precision: 0.902
recall: 0.434
F1-score: 0.586
RMSE: 1.353
rho : 0.251
precision: 0.902
recall: 0.456
F1-score: 0.606
RMSE: 1.329
rho : 0.260
precision: 0.902
recall: 0.477
F1-score: 0.624


In [37]:
precision , recall = precision_and_recall()
F1_measure(precision , recall)

0.6243523316062176