In [2]:
import os
import numpy as np
import pandas as pd
from surprise import Reader
from surprise import Dataset
from surprise.model_selection import train_test_split
from surprise import KNNBasic
from surprise import accuracy
import random
import pickle

#Para garantizar reproducibilidad en resultados
seed = 10
random.seed(seed)
np.random.seed(seed)

In [3]:
if not (os.path.exists('./Data/preprocessed_user_item_rating.csv')):
  raise ValueError('El archivo preprocessed_user_item_rating.csv no fue encontrado en el path')
else:
  print("El archivo ha sido cargado")

El archivo ha sido cargado


In [4]:
ratings=pd.read_csv('./Data/preprocessed_user_item_rating.csv', sep = ',', header=0, names = [ 'userid', 'artist-name', 'rating' ] )
ratings = ratings.loc[:,['userid', 'artist-name','rating']]
ratings

Unnamed: 0,userid,artist-name,rating
0,user_001000,Wilco,5.0
1,user_001000,Radiohead,4.9
2,user_001000,Animal Collective,4.7
3,user_001000,Girl Talk,4.6
4,user_001000,Aesop Rock,4.2
...,...,...,...
896880,user_000001,Jamie Lidell,0.2
896881,user_000001,Nick Holmes,0.2
896882,user_000001,Nuyorican Soul,0.2
896883,user_000001,The Birthday,0.2


# Creacion sistema de recomendacion

In [5]:
reader = Reader( rating_scale = ( 0, 5 ) )
#Se crea el dataset a partir del dataframe
surprise_dataset = Dataset.load_from_df( ratings[ [ 'userid', 'artist-name', 'rating' ] ], reader )

In [6]:
trainset, testset=  train_test_split(surprise_dataset, test_size=.2)

### Modelo basado en distancias coseno

In [104]:
sim_options = {'name': 'cosine',
               'user_based': False  # calcule similitud item-item
               }
algo = KNNBasic(k=30, min_k=5, sim_options=sim_options)

In [None]:
predictions = algo.fit(trainset).test(testset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [None]:
pickle.dump( predictions, open( "./Data/predictions_ii_cosine.p", "wb" ) )

### Modelo basado en índice de Jaccard

### Modelo basado en correlación de Pearson

In [87]:
sim_options = {'name': 'pearson_baseline',
               'user_based': False,
               'shrinkage': 0  # no shrinkage
               }
algo = KNNBasic(sim_options=sim_options)

In [88]:
predictions = algo.fit(trainset).test(testset)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


In [89]:
pickle.dump( predictions, open( "./Data/predictions_ii_pearson.p", "wb" ) )

## Recomendaciones

In [97]:
pickle_predictions = pickle.load( open( "./Data/predictions_ii_pearson.p", "rb" ) )

In [98]:
#Predicciones para usuario user_001000
user_predictions=list(filter(lambda x: x[0]=='user_001000',pickle_predictions))

In [99]:
#Ordenamos de mayor a menor estimación de relevancia
user_predictions.sort(key=lambda x : x.est, reverse=True)

In [100]:
#tomamos las 10 primeras predicciones
user_predictions=user_predictions[0:10]

In [101]:
#Se convierte a dataframe
labels = ['artist', 'estimation']
df_predictions = pd.DataFrame.from_records(list(map(lambda x: (x.iid, x.est) , user_predictions)), columns=labels)

### Recomendaciones usuario 001000

In [102]:
df_predictions.loc[:,['artist']]

Unnamed: 0,artist
0,Dj Format
1,Kenny Burrell
2,Santogold
3,Groove Collective
4,Crystal Castles
5,Ben Folds
6,Bon Iver
7,Cold War Kids
8,M.I.A.
9,Modest Mouse


In [103]:
accuracy.rmse( user_predictions, verbose = True )

RMSE: 1.0279


1.0279055833904374

In [16]:
algo.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x115e2afa0>

In [17]:
algo.predict('user_001000','Kenny Burrell')

Prediction(uid='user_001000', iid='Kenny Burrell', r_ui=None, est=1, details={'actual_k': 30, 'was_impossible': False})

In [18]:
item_inner_id = algo.trainset.to_inner_iid('Kenny Burrell')

In [19]:
# Retrieve inner ids of the nearest neighbors of Item.
item_neighbors = algo.get_neighbors(item_inner_id, k=10)
item_neighbors

[1390, 1732, 1741, 2743, 3726, 4180, 4191, 5339, 5608, 7764]

In [20]:
neighbors = (algo.trainset.to_raw_iid(rid)
                       for rid in item_neighbors)
for neighbor in neighbors:
    print(neighbor)

Aberfeldy
The Stooges
Pet Shop Boys
Willie Nelson
Roger Eno
The Congos
Boredoms
Alizée
Ralph Mctell
Johannes Schmoelling
