In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
hassan   =   np.array([9, 8, 7, 6, 7, 8, 6])
red_bull =   np.array([10, 9, 6, 7, 6, 9, 5])
ferrari  =   np.array([9, 7, 6, 6, 7, 7, 5])
mercedes =   np.array([8, 6, 8, 9, 9, 5, 9])

teams = {
    'Red_bull': red_bull,
    'Ferrari': ferrari,
    'Mercedes': mercedes 
}


print("Driver and Teams:")
print(f"Hassan: {hassan}")
print(f"Red Bull: {teams['Red_bull']}")
print(f"Ferrari: {teams['Ferrari']}")
print(f"Mercedes: {teams['Mercedes']}")

Driver and Teams:
Hassan: [9 8 7 6 7 8 6]
Red Bull: [10  9  6  7  6  9  5]
Ferrari: [9 7 6 6 7 7 5]
Mercedes: [8 6 8 9 9 5 9]


In [5]:
from scipy.spatial.distance import cosine
from scipy.stats import pearsonr

#cosine similarity
def cosine_similarity(v1, v2):
    """
        getting the similarity between two vectors by calculating thier
        dot product divided by the mult. of their magnitudes.
        1 ==> exactly the same direction [cosine(0)]
        0 ==> no similarity at all (perpindicular) [cosine[90]]
       -1 ==> oppoiste direction[(cosine(180)]
    """
    return 1 - cosine(v1, v2)

cosine_res = {}
for name, profile in teams.items():
    sim = cosine_similarity(hassan, profile)
    cosine_res[name] = sim
    print(f"Similarity between Hassan and {name}: {sim}\n")

#best team for hassan?
best_team_cos = max(cosine_res, key = cosine_res.get)
print(f"Best team for Hasssan: {best_team_cos}")

Similarity between Hassan and Red_bull: 0.9917790279239155

Similarity between Hassan and Ferrari: 0.9972562318087912

Similarity between Hassan and Mercedes: 0.9564218442002067

Best team for Hasssan: Ferrari


In [8]:
from scipy.spatial.distance import euclidean

def euclidean_similarity(v1, v2):
    """
        after getting the raw distance ==> 0 perfect similarity
        we should put it in a formula that make it inverse

        1==> perfect similarity and less values mean worst sim.

        that formula is 1/(1+d)
    """

    return 1 / (1 + euclidean(v1,v2))


euc_res = {}
for name, profile in teams.items():
    sim = euclidean_similarity(hassan, profile)
    euc_res[name] = sim
    print(f"Similarity between Hassan and {name}: {sim}\n")

#best team for hassan?
best_team_euc = max(euc_res, key = euc_res.get)
print(f"Best team for Hasssan: {best_team_euc}")

Similarity between Hassan and Red_bull: 0.27429188517743175

Similarity between Hassan and Ferrari: 0.3333333333333333

Similarity between Hassan and Mercedes: 0.14118784806383944

Best team for Hasssan: Ferrari


In [9]:
from scipy.spatial.distance import cityblock
#cityblock is another name for the manhattan

def manhattan_similarity(v1, v2):
    """
        after getting the raw distance ==> 0 perfect similarity
        we should put it in a formula that make it inverse

        1==> perfect similarity and less values mean worst sim.

        that formula is 1/(1+d),

        the difference between it and the euclidean is that the manhattan
        computes the sum of the absolute differences while the euclidean square them and taking the square root
    """

    return 1 / (1 + cityblock(v1,v2))


man_res = {}
for name, profile in teams.items():
    sim = manhattan_similarity(hassan, profile)
    man_res[name] = sim
    print(f"Similarity between Hassan and {name}: {sim}\n")

#best team for hassan?
best_team_man = max(man_res, key = man_res.get)
print(f"Best team for Hasssan: {best_team_man}")

Similarity between Hassan and Red_bull: 0.125

Similarity between Hassan and Ferrari: 0.2

Similarity between Hassan and Mercedes: 0.0625

Best team for Hasssan: Ferrari


Now for the final comparison and the best desicion for Hassan

In [18]:
print("="*70)
print("SIMILARITY ANALYSIS - HASSAN'S TEAM COMPATIBILITY")
print("="*70)

final_results = {
    'Cosine_sim': cosine_res,
    'Euclidean_sim': euc_res,
    'Manhattan_sim': man_res
}

print("SIMILARITY SCORES:")
print("-" * 50)
print(f"{'Method':<20} {'Red Bull':<12} {'Ferrari':<12} {'Mercedes':<12}")
print("-" * 50)

for method_name, results in final_results.items():
    print(f"{method_name:<20} {results['Red_bull']:<12.4f} {results['Ferrari']:<12.4f} {results['Mercedes']:<12.4f}")

print("-" * 50)

print("BEST TEAM USING EVERY METHOD:")
best_team = {}
for method_name, results in final_results.items():
    team = max(results, key=results.get)
    score = results[team]
    best_team[method_name] = (team, score)
    print(f"{method_name:<20}: {team} (Score: {score:.4f})")

print("AVERAGE SCORES:")
team_avg = {}
for team in teams.keys():
    avg_score = np.mean([res[team] for res in final_results.values()])
    team_avg[team] = avg_score
    print(f"{team:<12}: {avg_score:.4f}")

winner = max(team_avg, key=team_avg.get)
winner_score = team_avg[winner]

print("\n" + "="*70)
print("FINAL RECOMMENDATION")
print("="*70)
print(f"Best Team for Hassan: {winner}")
print(f"Average Compatibility Score: {winner_score:.4f}")

SIMILARITY ANALYSIS - HASSAN'S TEAM COMPATIBILITY
SIMILARITY SCORES:
--------------------------------------------------
Method               Red Bull     Ferrari      Mercedes    
--------------------------------------------------
Cosine_sim           0.9918       0.9973       0.9564      
Euclidean_sim        0.2743       0.3333       0.1412      
Manhattan_sim        0.1250       0.2000       0.0625      
--------------------------------------------------
BEST TEAM USING EVERY METHOD:
Cosine_sim          : Ferrari (Score: 0.9973)
Euclidean_sim       : Ferrari (Score: 0.3333)
Manhattan_sim       : Ferrari (Score: 0.2000)
AVERAGE SCORES:
Red_bull    : 0.4637
Ferrari     : 0.5102
Mercedes    : 0.3867

FINAL RECOMMENDATION
Best Team for Hassan: Ferrari
Average Compatibility Score: 0.5102
