In [1]:
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import DistanceMetric

In [2]:
class User:
    def __init__(self, ind, attributes):
        self.ind = ind
        self.attributes = attributes
        self.edat = attributes["edat"]
        self.estil_literari = attributes["estil_literari"]
        self.temas_especifics = attributes["temas_especifics"]
        self.complexitat = attributes["complexitat"]
        self.caracteristiques = attributes["caracteristiques"]
        self.desenvolupament_del_personatge = attributes["desenvolupament_del_personatge"]
        self.accio_o_reflexio = attributes["accio_o_reflexio"]
        self.longitud = attributes["longitud"]
        self.epoca = attributes["epoca"]
        self.detall_cientific = attributes["detall_cientific"]

        self.vector = []
    
    def __str__(self):
        print(f"User {self.ind}")
        for key, value in self.attributes.items():
            print(f"{key}: {value}")
        return ""

In [3]:
att = {
    "edat": 20,
    "estil_literari": "realisme",
    "temas_especifics": "amor",
    "complexitat": "baixa",
    "caracteristiques": "simples",
    "desenvolupament_del_personatge": "alt",
    "accio_o_reflexio": "accio",
    "longitud": "mitjana",
    "epoca": "actual",
    "detall_cientific": "baix"
}
user = User(1, att)

att2 = {
    "edat": 50,
    "estil_literari": "simbolisme",
    "temas_especifics": "terror",
    "complexitat": "alta",
    "caracteristiques": "complexes",
    "desenvolupament_del_personatge": "mitja",
    "accio_o_reflexio": "reflexio",
    "longitud": "llarga",
    "epoca": "futura",
    "detall_cientific": "alta"
}
user2 = User(2, att2)

att3 = {
    "edat": 50,
    "estil_literari": "simbolisme",
    "temas_especifics": "terror",
    "complexitat": "baixa",
    "caracteristiques": "complexes",
    "desenvolupament_del_personatge": "mitja",
    "accio_o_reflexio": "reflexio",
    "longitud": "llarga",
    "epoca": "futura",
    "detall_cientific": "alta"
}
user3 = User(3, att3)
print(user)

User 1
edat: 20
estil_literari: realisme
temas_especifics: amor
complexitat: baixa
caracteristiques: simples
desenvolupament_del_personatge: alt
accio_o_reflexio: accio
longitud: mitjana
epoca: actual
detall_cientific: baix



In [4]:
class CBR:
    def __init__(self, users):
        self.users = users
    
    def __str__(self):
        for user in self.users:
            print(user)
        return ""
    
    def get_users(self):
        return self.users
    
    def get_encoder(self):
        categories = [
            ["realisme", "romanticisme", "naturalisme", "simbolisme", "modernisme", "realisme magico", "postmodernisme"],
            ["amor", "aventura", "terror", "fantasia", "ciencia ficcio", "historica", "filosofica", "psicologica", "social", "politica", "religiosa", "erotica", "humoristica", "costumista", "negra", "realista", "fantastica", "mitologica", "poetica", "satirica", "biografica", "epica", "didactica", "teatral", "lirica", "epistolar", "dramatica", "epica", "didactica", "teatral", "lirica", "epistolar", "dramatica"],
            ["baixa", "mitjana", "alta"],
            ["simples", "complexes"],
            ["baix", "mitja", "alt"],
            ["accio", "reflexio"],
            ["curta", "mitjana", "llarga"],
            ["actual", "passada", "futura"],
            ["baix", "mitja", "alta"]
        ]
        encoder = OneHotEncoder(categories=categories, sparse_output=False)
        encoder.fit([["realisme", "amor", "baixa", "simples", "baix", "accio", "curta", "actual", "baix"]])
        return encoder
    
    def transform_user_to_numeric(self, encoder, users):
        for user in users:
            categorical_attributes = []
            numeric_attributes = []
            for key, value in user.attributes.items():
                if isinstance(value, str):
                    categorical_attributes.append(value)
                else:
                    numeric_attributes.append(value/100)

            transformed_categorical_data = encoder.transform([categorical_attributes])
            combined_data = np.hstack((numeric_attributes, transformed_categorical_data[0]))

            user.vector = combined_data

        return users
    
    def similarity(self, user1, user2, metric):
        if metric == "hamming":
            # Hamming distance
            dist = DistanceMetric.get_metric('hamming')
            return dist.pairwise([user1.vector], [user2.vector])[0][0]
        elif metric == "cosine":
            return cosine_similarity([user1.vector], [user2.vector])[0][0]
        
    def retrieve(self, user, metric):
        """
        Return 5 most similar users
        """
        similarities = []
        for u in self.users:
            similarities.append((u, self.similarity(user, u, metric)))
        similarities.sort(key=lambda x: x[1], reverse=True)
        return similarities[:5]

In [5]:
cbr = CBR([user, user2, user3])
encoder = cbr.get_encoder()
users = cbr.transform_user_to_numeric(encoder, cbr.get_users())

In [6]:
print(cbr.get_users()[0].vector)
print(cbr.get_users()[1].vector)
print(cbr.get_users()[2].vector)

[0.2 1.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  0.  1.  0.  0.  1.  0.  0.  0.  1.  1.  0.  0.  1.  0.
 1.  0.  0.  1.  0.  0. ]
[0.5 0.  0.  0.  1.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  0.  0.  0.  1.  0.  1.  0.  1.  0.  0.  1.  0.  0.  1.
 0.  0.  1.  0.  0.  1. ]
[0.5 0.  0.  0.  1.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  0.  1.  0.  0.  0.  1.  0.  1.  0.  0.  1.  0.  0.  1.
 0.  0.  1.  0.  0.  1. ]


In [7]:
print("Cosine")
print(cbr.similarity(cbr.get_users()[0], cbr.get_users()[1], "cosine"))
print(cbr.similarity(cbr.get_users()[1], cbr.get_users()[2], "cosine"))
print("Hamming")
print(cbr.similarity(cbr.get_users()[0], cbr.get_users()[1], "hamming"))
print(cbr.similarity(cbr.get_users()[1], cbr.get_users()[2], "hamming"))

Cosine
0.010935657966662841
0.8918918918918919
Hamming
0.31666666666666665
0.03333333333333333
