# **Sistema basado en contenido**

# **Librerías**
**PANDAS:** Esta librería nos permite importar desde diferentes fuentes de datos, guardándose en un objeto de tipo data frame con el cual podemos realizar diferentes operaciones.  

**NUMPY:** Permite realizar funciones matemáticas con un alto nivel en la administración de matrices multidimensionales.

**Numpy.linalg:** Permite encontrar el valor de la norma matriz o la norma vectorial. El parámetro ord decide si la función encuentra la norma matriz o la norma vectorial. Tiene varios valores definidos.



In [None]:
#Importacion de las librerias
import pandas as pd
import numpy as np
from numpy import dot
from numpy.linalg import norm 

In [None]:
# Constante con el nombre del dataset
PATH = 'data.csv'

# Importar datos

In [None]:
#Importacion y lectura del dataset
df = pd.read_csv(PATH)
df.shape

(100000, 10)

In [None]:
#Lectura de los cinco primeros registros del dataset.
df.head()

Unnamed: 0,book_id,author_id,book_genre,reader_id,num_pages,book_rating,publisher_id,publish_year,book_price,text_lang
0,655,52,4,11482,300,4,8,2012,94,7
1,2713,90,3,6479,469,1,8,2012,33,5
2,409,17,2,25472,435,1,12,2001,196,4
3,1150,234,10,23950,529,2,23,2019,79,2
4,2424,390,5,13046,395,2,20,2010,200,4


# Recomendación de libro

In [None]:
def normalize(data):
    '''
    Esta función normalizará los datos de entrada para que estén entre 0 y 1
    
      parámetros:
          data (List) : La lista de valores que desea normalizar
    
      Devuelve:
          Los datos de entrada se normalizaron entre 0 y 1
    '''
    min_val = min(data)
    if min_val < 0:
        data = [x + abs(min_val) for x in data]
    max_val = max(data)
    return [x/max_val for x in data]

In [None]:
# normalizar las num_pages, calificaciones, columnas de precios
df['num_pages_norm'] = normalize(df['num_pages'].values)
df['book_rating_norm'] = normalize(df['book_rating'].values)
df['book_price_norm'] = normalize(df['book_price'].values)

In [None]:
def ohe(df, enc_col):
    '''
      Esta función codificará en caliente la columna especificada y la volverá a agregar.
      en el marco de datos de entrada
    
      parámetros:
          df (DataFrame) : El marco de datos al que desea que se anexen los resultados
          enc_col (String) : La columna que desea OHE
    
      Devuelve:
          Las columnas OHE agregadas al marco de datos de entrada
    '''
    
    ohe_df = pd.get_dummies(df[enc_col])
    ohe_df.reset_index(drop = True, inplace = True)
    return pd.concat([df, ohe_df], axis = 1)

In [None]:
# OHE sobre publish_year y género
df = ohe(df = df, enc_col = 'publish_year')
df = ohe(df = df, enc_col = 'book_genre')
df = ohe(df = df, enc_col = 'text_lang')

In [None]:
# eliminar columnas redundantes
cols = ['publish_year', 'book_genre', 'num_pages', 'book_rating', 'book_price', 'text_lang']
df.drop(columns = cols, inplace = True)
df.set_index('book_id', inplace = True)

In [None]:
class CBRecommend():
    def __init__(self, df):
        self.df = df
        
    def cosine_sim(self, v1,v2):
        '''
        Esta función calculará la similitud del coseno entre dos vectores
        '''
        return dot(v1,v2)/(norm(v1)*norm(v2))
    
    def recommend(self, book_id, n_rec):
        """
        df (dataframe): El dataframe
        song_id (cadena): Representa el nombre de la canción
        n_rec (int): cantidad de rec que el usuario desea
        """
        
        # calcular la similitud de entrada book_id vector w.r.t todos los demás vectores
        inputVec = self.df.loc[book_id].values
        self.df['sim']= self.df.apply(lambda x: self.cosine_sim(inputVec,x.values), axis=1)
        
        # devuelve los libros especificados por el usuario n superior
        return self.df.nlargest(columns='sim',n=n_rec)

In [None]:
t = df.sample(1000).copy()
cbr = CBRecommend(df = t)

In [None]:
# Mostrar los datos
cbr.df.head()

Unnamed: 0_level_0,author_id,reader_id,publisher_id,num_pages_norm,book_rating_norm,book_price_norm,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,1,2,3,4,5,6,7,8,9,10,1,2,3,4,5,6,7
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1
1199,88,12732,25,0.702857,0.8,0.635,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
2854,341,8372,40,0.822857,0.2,0.735,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
1231,260,1876,8,0.495714,0.3,0.94,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0
1605,227,26557,24,0.181429,1.0,0.07,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1
1881,115,21620,18,0.738571,0.9,0.015,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0


In [None]:
cbr.recommend(book_id = t.index[0], n_rec = 5)

Unnamed: 0_level_0,author_id,reader_id,publisher_id,num_pages_norm,book_rating_norm,book_price_norm,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,1,2,3,4,5,6,7,8,9,10,1,2,3,4,5,6,7,sim
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
1199,88,12732,25,0.702857,0.8,0.635,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1.0
51,153,21514,46,0.457143,0.8,0.615,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1.0
973,94,13935,30,0.752857,0.6,0.055,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1.0
1324,135,20178,43,0.582857,0.2,0.885,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1.0
2215,62,8974,20,0.488571,1.0,0.525,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1.0
