In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
data = pd.read_csv("synergy.csv")

data.columns

## Vector Similarity Metrics:

* It is the representation of data in the form of nth dimensional vectors and the similarity is determined by the distance between them as they are both inversly propotional.

* Most common metrics:

  * Euclidean distance: 
    * measures the straight line distance between 2 points in space.
    * heavily influenced by the magnitude of the vector.
    * calculated using the normal distance formula. 
  
  * Cosine similarity:
    * measures the cos of an angle between two vectors concerned with the orientaion of the vectors(1 = perfectly aligned).
    * it ignores the magnitude of the vectors.
    * calculated as the dot product of the vectors divided by the product of their magnitudes.
  
  * Manhattan distance:
    * measures how close they are to each other without being influenced by the vector magnitude.
    * less sensitive to outliers
    * calculated by summing the absolute difference of their co-ordinates.
  
  * Minkowski distance:
    * a generalized distance that includes manhattan and euclidean distance and is defined by a parameter p.
    * if p = 2 it is normal euclidean distance, p = 1 manhattan distance.
    * Chebyshev distance is when p = infinity, which defines the max co-ordinate distance

In [None]:
from math import sqrt

def get_cosine_similarity(vector1, vector2):
    # Calculate magnitudes once
    vector1_mag = get_magnitude(vector1)
    vector2_mag = get_magnitude(vector2)

    # Handle the case where a vector has zero magnitude to avoid division by zero
    if vector1_mag == 0 or vector2_mag == 0:
        return 0

    dot_product = 0
    column_labels = ['Speed', 'Aggression', 'Adaptability', 'Technical Skill', 'Teamwork', 'Risk-taking']
    
    # calculate the dot product (numerator)
    for label in column_labels:
        dot_product += vector1[label] * vector2[label]
    
    # calculate the final cosine similarity
    cos_value = dot_product / (vector1_mag * vector2_mag)

    return round(cos_value, 2)

def get_magnitude(vector):
    column_labels = ['Speed', 'Aggression', 'Adaptability', 'Technical Skill', 'Teamwork', 'Risk-taking']
    magnitude = 0
    for label in column_labels:
        magnitude += vector[label] ** 2

    return round(sqrt(magnitude), 3)

results = {}

for row in range(1, len(data)):
    results[data.loc[row]['Name']] = get_cosine_similarity(data.loc[0], data.loc[row])

print(max(results, key=results.get), ": " , max(results.values()))

In [None]:
from math import sqrt

def get_euclidean_distance(point1, point2):
    # set the initial distance value
    distance = 0
    column_labels = ['Speed', 'Aggression', 'Adaptability', 'Technical Skill', 'Teamwork', 'Risk-taking']

    # loop over each value in each vector and get their absolute difference
    for label in column_labels:
        distance += (point1[label] - point2[label]) ** 2

    return round(sqrt(distance), 2)

results = {}

for row in range(1, len(data)):
    results[data.loc[row]['Name']] = get_euclidean_distance(data.loc[0], data.loc[row])

print(min(results, key=results.get), ": " , min(results.values()))

In [None]:
def get_manhattan_distance(point1, point2):
    # set the initial distance value
    distance = 0
    column_labels = ['Speed', 'Aggression', 'Adaptability', 'Technical Skill', 'Teamwork', 'Risk-taking']

    # loop over each value in each vector and get their absolute difference
    for label in column_labels:
        distance += abs((point1[label] - point2[label]))

    return distance

results = {}

for row in range(1, len(data)):
    results[data.loc[row]['Name']] = get_manhattan_distance(data.loc[0], data.loc[row])

print(min(results, key=results.get), ": " , min(results.values()))

In [None]:
def get_minkowski_distance(point1, point2, p):
    # set the initial distance value
    distance = 0
    column_labels = ['Speed', 'Aggression', 'Adaptability', 'Technical Skill', 'Teamwork', 'Risk-taking']

    # loop over each value in each vector and get their absolute difference
    for label in column_labels:
        distance += (abs((point1[label] - point2[label]))) ** p

    return round(distance ** (1/p), 2)

results = {}

for row in range(1, len(data)):
    results[data.loc[row]['Name']] = get_minkowski_distance(data.loc[0], data.loc[row], 100)

print(min(results, key=results.get), ": " , min(results.values()))

In [None]:
from math import sqrt

class VectorSimilarity:
    def __init__(self, data):
        self.data = data
        
        # The labels of the numerical columns
        self.__column_labels = self.data.select_dtypes(include=int).columns
        self.results = {}
        
        # The dispatch table for metric functions, correctly referencing methods
        self.__metric_functions = {
            'euclidean': self.__get_euclidean_distance, 
            'manhattan': self.__get_manhattan_distance, 
            'minkowski': self.__get_minkowski_distance, 
            'cosine': self.__get_cosine_similarity
        }

    def __get_euclidean_distance(self, point1, point2):
        # set the initial distance value
        distance = 0
        
        # loop over each value in each vector and get their squared difference
        for label in self.__column_labels:
            distance += (point1[label] - point2[label]) ** 2
        
        return round(sqrt(distance), 3)
    
    def __get_manhattan_distance(self, point1, point2):
        # set the initial distance value
        distance = 0
        
        # loop over each value in each vector and get their absolute difference
        for label in self.__column_labels:
            distance += abs((point1[label] - point2[label]))
        
        return distance
    
    def __get_minkowski_distance(self, point1, point2, p):
        # set the initial distance value
        distance = 0
        
        # loop over each value in each vector and get their absolute difference
        for label in self.__column_labels:
            distance += (abs((point1[label] - point2[label]))) ** p
        
        return round(distance ** (1/p), 3)
    
    def __get_cosine_similarity(self, vector1, vector2):
        # set the initial dot product value
        dot_product = 0
        
        # Calculate magnitudes once using the class's method
        vector1_mag = self.__get_magnitude(vector1)
        vector2_mag = self.__get_magnitude(vector2)
        
        # Handle the case where a vector has zero magnitude to avoid division by zero
        if vector1_mag == 0 or vector2_mag == 0:
            return 0
        
        # calculate the dot product (numerator)
        for label in self.__column_labels:
            dot_product += vector1[label] * vector2[label]
        
        # calculate the final cosine similarity
        return round(dot_product / (vector1_mag * vector2_mag), 3)

    def __get_magnitude(self, vector):
        magnitude = 0
        for label in self.__column_labels:
            magnitude += vector[label] ** 2

        return round(sqrt(magnitude), 3)
    
    def similarity_metric(self, targetID, funct_type='cosine', p=3):
        funct = self.__metric_functions[funct_type]
        
        # Clear previous results
        self.results = {}
        
        # Loop over every row (id) in the data
        for id in range(len(self.data)):
            # Skip the comparison of the row to itself
            if id == targetID:
                continue
            
            # Get the two rows to compare
            target_row = self.data.loc[targetID]
            current_row = self.data.loc[id]
            
            # Call the correct function with the correct parameters
            if funct_type == 'minkowski':
                self.results[self.data.loc[id]['Name']] = funct(target_row, current_row, p)
            else:
                self.results[self.data.loc[id]['Name']] = funct(target_row, current_row)
        
        # Get the key (name) of the min/max result
        if funct_type == "cosine":
            closest_item = max(self.results, key=self.results.get)
            value = self.results[closest_item]
        else:
            closest_item = min(self.results, key=self.results.get)
            value = self.results[closest_item]

        print(f"The closest item using {funct_type} is: {closest_item} with a value of {value}")

In [None]:
metric = VectorSimilarity(data)

metric.similarity_metric(0)
metric.similarity_metric(0, 'euclidean')
metric.similarity_metric(0, 'manhattan')
metric.similarity_metric(0, 'minkowski', p=5)