In [1]:
import numpy as np
import configparser
from src.clustering.algorithm_wrappers.KMeansWrapper import KMeansWrapper
from src.clustering.algorithm_wrappers.AgglomerativeWrapper import AgglomorativeWrapper


In [2]:
config = configparser.ConfigParser()
config.read('../../config.ini')
embedding_path = '../../' + config['DATA']['UserEmbeddingPath']
test_path = '../../' + config['DATA']['TestUserEmbeddingPath']

## Load embeddings

In [3]:
import pandas as pd
entity_embedding = pd.read_table(embedding_path, header=None)
# entity_embedding['vector'] = entity_embedding.iloc[:, 1:101].values.tolist()
# entity_embedding = entity_embedding[[0,'vector']].rename(columns={0: "entity"})
vectors = entity_embedding.iloc[:, 1:101].values
vectors

array([[-0.063388, -0.181451,  0.057501, ..., -0.101157, -0.091567,
         0.035234],
       [ 0.060958,  0.069934,  0.015832, ..., -0.133727,  0.025795,
         0.051448],
       [-0.093106, -0.052002,  0.020556, ..., -0.144866,  0.04469 ,
         0.013498],
       ...,
       [ 0.034682, -0.009413, -0.024317, ...,  0.00411 ,  0.013771,
        -0.008027],
       [-0.052323, -0.078029, -0.060925, ...,  0.043236,  0.032251,
        -0.001261],
       [-0.00519 , -0.047871,  0.009753, ...,  0.024246, -0.040823,
        -0.008903]])

## Train clustering model and find represantant

In [4]:
kmeans_model = KMeansWrapper()
kmeans_model.train(vectors)
kmeans_model.extract_representations() # return tuple (clusterid, location)

## Predict user cluster

In [5]:
import pandas as pd
test_embedding = pd.read_table(test_path, header=None)
test_vectors = test_embedding.iloc[:, 1:101].values

In [6]:
prediction = kmeans_model.predict(test_vectors[0])
print(f"You are labeled as cluster {prediction[0]}")

cluster_representant = kmeans_model.interpret(prediction)
print(f"Your representant lies at {cluster_representant}")

You are labeled as cluster 2
Your representant lies at [ 0.01486199  0.01371666  0.02562781  0.00365942 -0.02649633  0.00594536
  0.00340398  0.02430887 -0.0258613   0.0221435   0.0267743   0.0431899
  0.02423247 -0.00587488 -0.02078651 -0.01407465 -0.00443984 -0.01534354
 -0.03647151 -0.00640599 -0.04103385  0.00996752 -0.00876664  0.01176151
  0.02054307  0.01865038 -0.01510767 -0.00678739 -0.02256598  0.02943914
  0.00178737  0.00828318  0.00204737 -0.03084607  0.00363643 -0.03205378
 -0.0161689  -0.04892306  0.01987074  0.02955026 -0.00241167  0.00733279
  0.00827331  0.03353018  0.01879699 -0.04061238  0.02285283  0.04149151
 -0.02525314  0.02818177  0.01884214 -0.01010471  0.02284043  0.02365362
 -0.00668074  0.00172341  0.00618795 -0.0201387  -0.01207575 -0.01180714
 -0.02032126  0.03899155 -0.01407062  0.01746062  0.00683282 -0.02418404
 -0.02855626 -0.03549846 -0.00555335 -0.02550854 -0.03820352 -0.01752376
  0.026943    0.03193428 -0.02786438 -0.01444948 -0.01561734 -0.006607

## Suggest alternative

In [7]:
user_suggestion = kmeans_model.suggest(cluster_representant)
print(f"Would you like to see a user from {user_suggestion}")


Would you like to see a user from 3


# Agglomorative

In [8]:
agglomorative = AgglomorativeWrapper()
agglomorative.extract_representations(vectors)  # return tuple (clusterid, location)
prediction = kmeans_model.predict(test_vectors[0])
print(f"You are labeled as cluster {prediction[0]}")

cluster_representant = kmeans_model.interpret(prediction)
print(f"Your representant lies at {cluster_representant}")
user_suggestion = kmeans_model.suggest(cluster_representant)
print(f"Would you like to see a user from {user_suggestion}")


You are labeled as cluster 2
Your representant lies at [ 0.01486199  0.01371666  0.02562781  0.00365942 -0.02649633  0.00594536
  0.00340398  0.02430887 -0.0258613   0.0221435   0.0267743   0.0431899
  0.02423247 -0.00587488 -0.02078651 -0.01407465 -0.00443984 -0.01534354
 -0.03647151 -0.00640599 -0.04103385  0.00996752 -0.00876664  0.01176151
  0.02054307  0.01865038 -0.01510767 -0.00678739 -0.02256598  0.02943914
  0.00178737  0.00828318  0.00204737 -0.03084607  0.00363643 -0.03205378
 -0.0161689  -0.04892306  0.01987074  0.02955026 -0.00241167  0.00733279
  0.00827331  0.03353018  0.01879699 -0.04061238  0.02285283  0.04149151
 -0.02525314  0.02818177  0.01884214 -0.01010471  0.02284043  0.02365362
 -0.00668074  0.00172341  0.00618795 -0.0201387  -0.01207575 -0.01180714
 -0.02032126  0.03899155 -0.01407062  0.01746062  0.00683282 -0.02418404
 -0.02855626 -0.03549846 -0.00555335 -0.02550854 -0.03820352 -0.01752376
  0.026943    0.03193428 -0.02786438 -0.01444948 -0.01561734 -0.006607