# RapidFizz experiments
## Compare skills similarity

Use [Lightcast](https://lightcast.io/) skill taxonomy to compare skill similarity.

In [10]:
import numpy as np
import pandas as pd

In [None]:
def create_similarity_matrix(skills):
    similarity_matrix = np.zeros((len(skills), len(skills)))

    # Fill the similarity matrix with similarity scores
    for i, skill1 in enumerate(skills):
        for j, skill2 in enumerate(skills):
            if i == j:
                # The similarity of a skill with itself is 100%
                similarity_matrix[i][j] = 100.0
            else:
                # Calculate similarity score using WRatio
                similarity_matrix[i][j] = fuzz.WRatio(skill1, skill2)

    return similarity_matrix

In [25]:
skills = ["Android SDK", "Android Studio", "Android Jetpack", "Jetpack Compose", "Kotlin", "Gradle", "Android Robot RC"]
similarity_matrix = create_similarity_matrix(skills)

In [26]:
similarity_df = pd.DataFrame(similarity_matrix, index=skills, columns=skills)
similarity_df

Unnamed: 0,Android SDK,Android Studio,Android Jetpack,Jetpack Compose,Kotlin,Gradle,Android Robot RC
Android SDK,100.0,73.888889,73.888889,14.615385,30.0,30.0,73.888889
Android Studio,73.888889,100.0,63.333333,19.655172,32.727273,30.0,63.333333
Android Jetpack,73.888889,63.333333,100.0,60.454545,30.0,30.0,60.454545
Jetpack Compose,14.615385,19.655172,60.454545,100.0,21.375,25.714286,24.516129
Kotlin,30.0,32.727273,30.0,21.375,100.0,16.666667,42.75
Gradle,30.0,30.0,30.0,25.714286,16.666667,100.0,30.0
Android Robot RC,73.888889,63.333333,60.454545,24.516129,42.75,30.0,100.0


Since previous skills are international it doesn't make sense to translate them to other languages. To check RapidFuzz performance across different languages skills which can be translated to different languages are used. There are top 5 waiter and cook skills.

In [21]:
waiter_skills = ["Order Taking", "Customer Service", "Table Setting", "Menu Knowledge", "Payment Processing"]
cook_skills = ["Food Preparation", "Grilling", "Baking", "Food Safety", "Recipe Knowledge"]
waiter_similarity_matrix = create_similarity_matrix(waiter_skills)
cook_similarity_matrix = create_similarity_matrix(cook_skills)


In [23]:
waiter_similarity_df = pd.DataFrame(waiter_similarity_matrix, index=waiter_skills, columns=waiter_skills)
waiter_similarity_df

Unnamed: 0,Order Taking,Customer Service,Table Setting,Menu Knowledge,Payment Processing
Order Taking,100.0,28.571429,40.0,30.769231,42.857143
Customer Service,28.571429,100.0,34.482759,26.666667,35.294118
Table Setting,40.0,34.482759,100.0,29.62963,45.16129
Menu Knowledge,30.769231,26.666667,29.62963,100.0,37.5
Payment Processing,42.857143,35.294118,45.16129,37.5,100.0


In [24]:
cook_similarity_df = pd.DataFrame(cook_similarity_matrix, index=cook_skills, columns=cook_skills)
cook_similarity_df

Unnamed: 0,Food Preparation,Grilling,Baking,Food Safety,Recipe Knowledge
Food Preparation,100.0,38.571429,49.090909,51.851852,29.6875
Grilling,38.571429,100.0,42.857143,0.0,27.692308
Baking,49.090909,42.857143,100.0,16.363636,30.0
Food Safety,51.851852,0.0,16.363636,100.0,28.148148
Recipe Knowledge,29.6875,27.692308,30.0,28.148148,100.0


In [35]:
waiter_skills_fr = ["Prise de commande", "Service à la clientèle", "Dressage de la table", "Connaissance des menus", "Traitement des paiements"]
cook_skills_fr = ["Food Preparation", "Grilling", "Baking", "Food Safety", "Recipe Knowledge"]

waiter_all_skills = waiter_skills + waiter_skills_fr
waiter_all_similarity_matrix = create_similarity_matrix(waiter_all_skills)
waiter_all_skills

['Order Taking',
 'Customer Service',
 'Table Setting',
 'Menu Knowledge',
 'Payment Processing',
 'Prise de commande',
 'Service à la clientèle',
 'Dressage de la table',
 'Connaissance des menus',
 'Traitement des paiements']

In [34]:
waiter_all_similarity_df = pd.DataFrame(waiter_all_similarity_matrix, index=waiter_all_skills, columns=waiter_all_skills)
waiter_all_similarity_df

Unnamed: 0,Order Taking,Customer Service,Table Setting,Menu Knowledge,Payment Processing,Prise de commande,Service à la clientèle,Dressage de la table,Connaissance des menus,Traitement des paiements
Order Taking,100.0,28.571429,40.0,30.769231,42.857143,41.37931,37.5,37.894737,34.285714,45.0
Customer Service,28.571429,100.0,34.482759,26.666667,35.294118,36.363636,57.826087,27.777778,31.578947,39.375
Table Setting,40.0,34.482759,100.0,29.62963,45.16129,26.666667,41.04,47.5,34.615385,40.909091
Menu Knowledge,30.769231,26.666667,29.62963,100.0,37.5,32.258065,36.0,27.941176,50.666667,36.642857
Payment Processing,42.857143,35.294118,45.16129,37.5,100.0,28.571429,30.0,26.315789,30.0,47.619048
Prise de commande,41.37931,36.363636,26.666667,32.258065,28.571429,100.0,41.025641,48.648649,46.153846,43.902439
Service à la clientèle,37.5,57.826087,41.04,36.0,30.0,41.025641,100.0,47.619048,31.818182,43.478261
Dressage de la table,37.894737,27.777778,47.5,27.941176,26.315789,48.648649,47.619048,100.0,42.857143,40.909091
Connaissance des menus,34.285714,31.578947,34.615385,50.666667,30.0,46.153846,31.818182,42.857143,100.0,52.173913
Traitement des paiements,45.0,39.375,40.909091,36.642857,47.619048,43.902439,43.478261,40.909091,52.173913,100.0
