In [1]:
import time
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import collections
from matching import Player
from matching.games import StableRoommates


# Calculate the similarity of preferences

## Pereference Matrix
One row is the activity rating of a user
Each column is an activity

In [2]:

user_ids = ['Snake','Panda','Tree','Dolphin']
preference_matrix = np.array([
    [1, 2, 3, 3, 1, 2],
    [1, 2, 4, 5, 1, 1],
    [5, 5, 3, 0, 5, 1],
    [5, 3, 2, 0, 4, 1]
], dtype='uint8')
size_in_bytes = preference_matrix.nbytes
print(f"Size of the matrix: {size_in_bytes} bytes")

Size of the matrix: 24 bytes


Create Huge Matrix to stress test

In [3]:
# num_users = 10000
# num_preferences = 10000
# huge_matrix = np.random.randint(0, 256, size=(num_users, num_preferences), dtype=np.uint8)
# 
# preference_matrix = huge_matrix
# 
# size_in_bytes = huge_matrix.nbytes
# size_in_gb = size_in_bytes / (1024**3)
# 
# print(f"Size of the matrix: {size_in_gb:.2f} GB")

## Manual Cos Calc
May come in useful later, when we want to calculate it on a per user basis

In [4]:
# def cosine_similarity(matrix):
#     dot_product = np.dot(matrix, matrix.T)
#     #print(dot_product)
#     norms = np.linalg.norm(matrix, axis=1)
#     # print(norms)
#     cosine_sim = dot_product / (np.outer(norms, norms))
#     return cosine_sim
# 
# 
# start_time = time.time()
# cosine_sim_matrix = cosine_similarity(preference_matrix)
# end_time = time.time()
# elapsed_time = end_time - start_time
# print(f"Elapsed time: {elapsed_time} seconds")
# # Print the cosine similarity matrix
# print(cosine_sim_matrix)


## Use sklearn to Cos Calc

In [5]:
start_time = time.time()

cosine_sim_matrix = cosine_similarity(preference_matrix)
end_time = time.time()

elapsed_time = end_time - start_time
print(f"Elapsed time: {np.round(elapsed_time,6)} seconds")


print(cosine_sim_matrix)


Elapsed time: 0.003764 seconds
[[1.         0.95470327 0.63543805 0.58609427]
 [0.95470327 1.         0.51663504 0.46709937]
 [0.63543805 0.51663504 1.         0.97990505]
 [0.58609427 0.46709937 0.97990505 1.        ]]


## Interpret the result
With words...

In [6]:
print('Higher Score = Better')

for i, user_pref in enumerate(cosine_sim_matrix):
    print(f'User {user_ids[i]} gets along with:')
    for j, partner_score in enumerate(user_pref):
        print(f"> user {user_ids[j]}: {partner_score}")
        


Higher Score = Better
User Snake gets along with:
> user Snake: 0.9999999999999999
> user Panda: 0.9547032697824668
> user Tree: 0.6354380478103605
> user Dolphin: 0.5860942701532693
User Panda gets along with:
> user Snake: 0.9547032697824668
> user Panda: 1.0000000000000002
> user Tree: 0.5166350401452484
> user Dolphin: 0.46709936649691386
User Tree gets along with:
> user Snake: 0.6354380478103605
> user Panda: 0.5166350401452484
> user Tree: 0.9999999999999998
> user Dolphin: 0.9799050483303551
User Dolphin gets along with:
> user Snake: 0.5860942701532693
> user Panda: 0.46709936649691386
> user Tree: 0.9799050483303551
> user Dolphin: 0.9999999999999999


In [7]:
# preference_list = {}
# 
# for i, user_pref in enumerate(cosine_sim_matrix):
#     print(f"list for {user_ids[i]}")
#     the_list = {score:user_ids[j] for j, score in enumerate(user_pref) if j != i} # j != i If its not the user itself
#     print(f'unsorted {the_list}')
#     sorted_list = dict(sorted(the_list.items(), reverse=True))
#     print(f'sorted {sorted_list}')
#     final_list = list(sorted_list.values())
#     print(final_list)
#     preference_list[user_ids[i]] = final_list
#     print('======')
# 
# preference_list


## Generate Users

## Cleaned Functions
Use these to actually implement it

In [8]:

# def get_sorted_list_func(user_ids, user_preferences):
#     def get_sorted_list(user_index):
#         user_score_list = {user_ids[partner_id]:score for partner_id, score in enumerate(user_preferences[user_index]) if partner_id != user_index}
#         user_score_ordered = collections.OrderedDict(sorted(user_score_list.items(), key=lambda item: item[1], reverse=True))
#         return list(user_score_ordered)
#     return get_sorted_list
# 
# def check_input(user_ids, preference_matrix):
#     user_limit = 10000
#     if len(user_ids)>user_limit or len(preference_matrix)>user_limit:
#         raise ValueError(f"The user limit is {user_limit}")
#     if len(user_ids) != len(preference_matrix):
#         raise ValueError(f"user list and matrix length do not match")
# 
# def get_preference_lists(user_ids, preference_matrix):
#     check_input(user_ids, preference_matrix)
# 
#     user_preferences = cosine_similarity(preference_matrix)
#     get_sorted_list = get_sorted_list_func(user_ids, user_preferences)
#     return [get_sorted_list(user_index) for user_index in range(len(user_ids))]
# 




## Use the Functions

[Snake, Panda, Tree, Dolphin]


In [14]:
from functions.matching_users import *


user_ids = ['Snake','Panda','Tree','Dolphin','Water','Basel','Table 16']
preference_matrix = np.array([
    [1, 2, 3, 3, 1, 2],
    [1, 2, 4, 5, 1, 1],
    [5, 5, 3, 0, 5, 1],
    [0, 1, 1, 0, 0, 1],
    [5, 5, 0, 0, 1, 1],
    [4, 3, 3, 0, 5, 1],
    [1, 1, 1, 0, 1, 1]
], dtype='uint8')
size_in_bytes = preference_matrix.nbytes
print(f"Size of the matrix: {size_in_bytes} bytes")

preference_order = get_preference_lists(user_ids, preference_matrix)
preference_order


ModuleNotFoundError: No module named 'data'

In [26]:
import json
from datetime import date
import random
from faker import Faker


def get_workdays():
    random_array = [random.choice([False, True]) for _ in range(5)]
    return [days for nr, days in enumerate(['Mon', 'Tue', 'Wed','Thu','Fri']) if random_array[nr]]

def get_preferences():
    preference_topics = ['Lord of the Rings', 'Sports', 'Football', 'PC Gaming', 'Console Gaming', 'Sleeping', 'Hygiene', 'Cooking', 'Books', 'Anime/Manga']
    score = [0,0,0,0,0,0,0,0,0,0,1,2,3,4,5]
    return {preference:random.choice(score) for preference in preference_topics}

def create_fake_users(amount):
    fake = Faker(locale = "en_GB")
    return [User(i, {'name':fake.name(),'days':get_workdays(),'preferences':get_preferences()}) for i in range(amount)]

def get_funfact(preferences):
    max_key = max(preferences, key=preferences.get)
    return 'I really like {}!'.format(max_key)

    
    

user = create_fake_users(1)
get_funfact(user[0].preferences)


'I really like Sports!'

In [25]:
from functions.data import User
# import random
# from faker import Faker
# 
# 
# 
# def get_workdays():
#     random_array = [random.choice([False, True]) for _ in range(5)]
#     return [days for nr, days in enumerate(['Mon', 'Tue', 'Wen','Thu','Fri']) if random_array[nr]]
# 
# def get_preferences():
#     preference_topics = ['Lord of the Rings', 'Hackatons', 'Console Gaming', 'PC Gaming', 'Humans', 'Bouldering', 'Boxing', 'Football']
#     score = [0,0,0,0,0,0,0,0,0,0,1,2,3,4,5]
#     return {preference:random.choice(score) for preference in preference_topics}
# 
# def create_fake_users(amount):
#     fake = Faker(locale = "en_GB")
#     return [User(i, {'name':fake.name(),'days':get_workdays(),'preferences':get_preferences()}) for i in range(amount)]
#         
# 
# fake_users = create_fake_users(10)
# 
# for u in fake_users:
#     print(str(u))




In [12]:
from functions.data import create_fake_users

In [13]:
fake_users = create_fake_users(10)

for u in fake_users:
    print(str(u))

{"name": "Ms Lisa Griffiths", "days": ["Wen", "Fri"], "preferences": {"Lord of the Rings": 3, "Hackatons": 0, "Console Gaming": 0, "PC Gaming": 1, "Humans": 2, "Bouldering": 0, "Boxing": 2, "Football": 0}, "id": 0}
{"name": "Mr Tony Alexander", "days": ["Tue", "Thu"], "preferences": {"Lord of the Rings": 0, "Hackatons": 0, "Console Gaming": 1, "PC Gaming": 0, "Humans": 3, "Bouldering": 3, "Boxing": 0, "Football": 0}, "id": 1}
{"name": "Natalie Davies", "days": ["Mon", "Tue", "Fri"], "preferences": {"Lord of the Rings": 0, "Hackatons": 0, "Console Gaming": 0, "PC Gaming": 1, "Humans": 2, "Bouldering": 0, "Boxing": 3, "Football": 4}, "id": 2}
{"name": "Janice O'Connor", "days": ["Tue", "Wen", "Thu"], "preferences": {"Lord of the Rings": 0, "Hackatons": 2, "Console Gaming": 0, "PC Gaming": 5, "Humans": 0, "Bouldering": 1, "Boxing": 0, "Football": 0}, "id": 3}
{"name": "Carl Richards", "days": ["Thu", "Fri"], "preferences": {"Lord of the Rings": 0, "Hackatons": 0, "Console Gaming": 4, "PC 