In [251]:
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans

In [252]:
def convert_categorical_to_numeric(users):
    # Separar atributos numéricos y categóricos
    categorical_data = []
    numeric_data = []
    for user in users:
        categorical_attributes = []
        numeric_attributes = []
        for key, value in user.attributes.items():
            if isinstance(value, str):
                categorical_attributes.append(value)
            else:
                numeric_attributes.append(value)
        categorical_data.append(categorical_attributes)
        numeric_data.append(numeric_attributes)

    # Aplicar One-Hot Encoding a los datos categóricos
    encoder = OneHotEncoder(sparse_output=False)
    transformed_categorical_data = encoder.fit_transform(categorical_data)

    # Combinar los datos numéricos con los categóricos transformados
    combined_data = np.hstack((numeric_data, transformed_categorical_data))

    # Actualizar los atributos de los usuarios
    for i, user in enumerate(users):
        user.attributes = combined_data[i]

    return encoder, users

In [253]:
class User:
    def __init__(self, user_id, attributes):
        self.user_id = user_id
        self.attributes = attributes  # Ejemplo: {'edad': 30, 'género': 'F'}
        self.cluster = None

class Book:
    def __init__(self, book_id, title, attributes):
        self.book_id = book_id
        self.title = title
        self.attributes = attributes  # Ejemplo: {'género': 'Fantasía', 'año': 2001}

class Rating:
    def __init__(self, user_id, book_id, score):
        self.user_id = user_id
        self.book_id = book_id
        self.score = score

In [254]:
class CBR:
    def __init__(self, users, books, ratings):
        self.books = books
        self.ratings = ratings
        self.encoder, self.users = self.init_users(users)
        self.kmeans = None

    def init_users(self, users):
        return convert_categorical_to_numeric(users)
    
    def make_clusters(self, k):
        user_attributes = np.array([user.attributes for user in self.users])
        kmeans = KMeans(n_clusters=k, n_init=10)  # Ajustar el número de clusters según sea necesario
        kmeans.fit(user_attributes)
        self.kmeans = kmeans

        for i, user in enumerate(self.users):
            user.cluster = kmeans.labels_[i]

    def predict_user_cluster(self, user):
        user.attributes = self.__convert_user_attributes(user.attributes)
        return self.kmeans.predict([user.attributes])[0]
    
    def __convert_user_attributes(self, user_attributes):
        categorical_attributes = []
        numeric_attributes = []
        for key, value in user_attributes.items():
            if isinstance(value, str):
                categorical_attributes.append(value)
            else:
                numeric_attributes.append(value)

        transformed_categorical_data = self.encoder.transform([categorical_attributes])
        return np.hstack((numeric_attributes, transformed_categorical_data[0]))

In [255]:
# Create dataset with 6 users and "edad" and "género" attributes
df = pd.DataFrame({
    'ind': [1, 2, 3, 4, 5, 6],
    'edad': [26, 20, 22, 40, 40, 45],
    'género': ['hombre', 'mujer', 'hombre', 'hombre', 'mujer', 'hombre']
})

# Per cada fila creamos un usuario con sus atributos
users = []
for index, row in df.iterrows():
    user_id = row['ind']
    attributes = row.drop('ind').to_dict()
    print(attributes)
    users.append(User(user_id, attributes))

cbr = CBR(users, [], [])

{'edad': 26, 'género': 'hombre'}
{'edad': 20, 'género': 'mujer'}
{'edad': 22, 'género': 'hombre'}
{'edad': 40, 'género': 'hombre'}
{'edad': 40, 'género': 'mujer'}
{'edad': 45, 'género': 'hombre'}


In [256]:
cbr.make_clusters(2)

In [257]:
# Print clusters
for user in cbr.users:
    print("User: " + str(user.user_id) + " Cluster: " + str(user.cluster) + " Attributes: " + str(user.attributes))

User: 1 Cluster: 0 Attributes: [26.  1.  0.]
User: 2 Cluster: 0 Attributes: [20.  0.  1.]
User: 3 Cluster: 0 Attributes: [22.  1.  0.]
User: 4 Cluster: 1 Attributes: [40.  1.  0.]
User: 5 Cluster: 1 Attributes: [40.  0.  1.]
User: 6 Cluster: 1 Attributes: [45.  1.  0.]


In [258]:
book1 = Book(1, 'El señor de los anillos', {'género': 'Fantasía', 'año': 2001})
book2 = Book(2, 'El código Da Vinci', {'género': 'Misterio', 'año': 2005})

rating1 = Rating(1, 1, 5)
rating2 = Rating(1, 2, 3)
rating3 = Rating(2, 1, 2)
rating4 = Rating(2, 2, 4)

In [259]:
user7 = User(7, {'edad': 25, 'género': 'mujer'})
users = np.append(users, user7)

print("Cluster:", cbr.predict_user_cluster(user7))

Cluster: 0
