In [2]:
from rapidfuzz import process, fuzz,utils

In [3]:
# Check the similarity score
name = "Kurtis Pykes"
full_name = "Kurtis K D Pykes"

print(f"Similarity score: {fuzz.ratio(name, full_name)}")

"""
Similarity Score: 86
"""

Similarity score: 85.71428571428572


'\nSimilarity Score: 86\n'

##
 - the partial_ratio() seeks to find how partially similar two strings are. 
 - Two strings are partially similar if they have some of the words in a common order. 
 - The partial_ratio() calculates the similarity by taking the shortest string, which in this scenario is stored in the variable name, then compares it against the sub-strings of the same length in the longer string, which is stored in full_name. 
 - Since order matters in partial ratio, our score dropped in this instance. Therefore, to get a 100% similarity match, you would have to move the "K D" part (signifying my middle name) to the end of the string. For example: 

In [4]:
# Check the similarity score
name = "Kurtis Pykes"
full_name = "Kurtis K D Pykes"

print(f"Similarity score: {fuzz.partial_ratio(name, full_name)}")

"""
Similarity Score: 67
"""

Similarity score: 73.6842105263158


'\nSimilarity Score: 67\n'

In [5]:
# Order matters with partial ratio
# Check the similarity score
name = "Kurtis Pykes"
full_name = "Kurtis Pykes K D"
print(f"Partial ratio similarity score: {fuzz.partial_ratio(name, full_name)}")
# But order will not effect simple ratio if strings do not match
print(f"Simple ratio similarity score: {fuzz.ratio(name, full_name)}")

"""
Partial ratio similarity score: 100
Simple ratio similarity score: 86
"""

Partial ratio similarity score: 100.0
Simple ratio similarity score: 85.71428571428572


'\nPartial ratio similarity score: 100\nSimple ratio similarity score: 86\n'

## Token sort ratio
  -  Token sort doesn’t care about what order words occur in. 
  -  It accounts for similar strings that aren’t in order as expressed above. 

In [6]:
# Check the similarity score
full_name = "Kurtis K D Pykes"
full_name_reordered = "Kurtis Pykes K D"

# Order does not matter for token sort ratio
print(f"Token sort ratio similarity score: {fuzz.token_sort_ratio(full_name_reordered, full_name)}")

# Order matters for partial ratio
print(f"Partial ratio similarity score: {fuzz.partial_ratio(full_name, full_name_reordered)}")

# Order will not effect simple ratio if strings do not match
print(f"Simple ratio similarity score: {fuzz.ratio(name, full_name)}")

"""
Token sort ratio similarity score: 100
Partial ratio similarity score: 75
Simple ratio similarity score: 86
"""

Token sort ratio similarity score: 100.0
Partial ratio similarity score: 85.71428571428572
Simple ratio similarity score: 85.71428571428572


'\nToken sort ratio similarity score: 100\nPartial ratio similarity score: 75\nSimple ratio similarity score: 86\n'

In [7]:
# Check the similarity score
name = "Kurtis Pykes"
full_name = "Kurtis K D Pykes"

print(f"Token sort ratio similarity score: {fuzz.token_sort_ratio(name, full_name)}")

"""
Token sort ratio similarity score: 86
"""

Token sort ratio similarity score: 85.71428571428572


'\nToken sort ratio similarity score: 86\n'

##Token set ratio
 - The token_set_ratio() method is pretty similar to the token_sort_ratio(), except it takes out common tokens before calculating how similar the strings are: this is extremely helpful when the strings are significantly different in length. 

In [8]:
# Check the similarity score
name = "Kurtis Pykes"
full_name = "Kurtis K D Pykes"

print(f"Token sort ratio similarity score: {fuzz.token_set_ratio(name, full_name)}")

"""
Token sort ratio similarity score: 100
"""

Token sort ratio similarity score: 100.0


'\nToken sort ratio similarity score: 100\n'

## The process module
 - The process module enables users to extract text from a collection using fuzzy string matching. Calling the extract() method on the process module returns the strings with a similarity score in a vector. For example: 

In [10]:
#from thefuzz import process

collection = ["AFC Barcelona", "Barcelona AFC", "barcelona fc", "afc barcalona"]
print(process.extract("barcelona", collection, scorer=fuzz.ratio))

"""
[('barcelona fc', 86), ('AFC Barcelona', 82), ('Barcelona AFC', 82), ('afc barcalona', 73)]
"""

[('barcelona fc', 85.71428571428572, 2), ('AFC Barcelona', 72.72727272727273, 0), ('Barcelona AFC', 72.72727272727273, 1), ('afc barcalona', 72.72727272727273, 3)]


"\n[('barcelona fc', 86), ('AFC Barcelona', 82), ('Barcelona AFC', 82), ('afc barcalona', 73)]\n"

In [2]:
from thefuzz import process # pip install thefuzz

collection = ["AFC Barcelona", "Barcelona AFC", "barcelona fc", "afc barcalona"]
print(process.extract("barcelona", collection, scorer=fuzz.ratio))

"""
[('barcelona fc', 86), ('AFC Barcelona', 82), ('Barcelona AFC', 82), ('afc barcalona', 73)]
"""

NameError: name 'fuzz' is not defined

In [None]:
collection = ["AFC Barcelona", "Barcelona AFC", "barcelona fc", "afc barcalona"]
print(process.extract("barcelona", collection, scorer=fuzz.ratio))

"""
[('barcelona fc', 86), ('AFC Barcelona', 82), ('Barcelona AFC', 82), ('afc barcalona', 73)]
"""

In [None]:
##
 - pip install fuzzywuzzy
 - https://www.geeksforgeeks.org/fuzzywuzzy-python-library/   
 - https://stackoverflow.com/questions/63700620/pass-more-than-one-parameter-in-fuzzywuzzy   

In [6]:
from fuzzywuzzy import fuzz 
from fuzzywuzzy import process 
  
s1 = "I love GeeksforGeeks"
s2 = "I am loving GeeksforGeeks"
print ("FuzzyWuzzy Ratio: ", fuzz.ratio(s1, s2)) 
print ("FuzzyWuzzy PartialRatio: ", fuzz.partial_ratio(s1, s2))
print ("FuzzyWuzzy TokenSortRatio: ", fuzz.token_sort_ratio(s1, s2)) 
print ("FuzzyWuzzy TokenSetRatio: ", fuzz.token_set_ratio(s1, s2)) 
print ("FuzzyWuzzy WRatio: ", fuzz.WRatio(s1, s2),'\n\n')
  
# for process library, 
query = 'geeks for geeks'
choices = ['geek for geek', 'geek geek', 'g. for geeks']  
print ("List of ratios: ")
print (process.extract(query, choices), '\n')
print ("Best among the above list: ",process.extractOne(query, choices)) 

FuzzyWuzzy Ratio:  84
FuzzyWuzzy PartialRatio:  85
FuzzyWuzzy TokenSortRatio:  84
FuzzyWuzzy TokenSetRatio:  86
FuzzyWuzzy WRatio:  84 


List of ratios: 
[('g. for geeks', 95), ('geek for geek', 93), ('geek geek', 86)] 

Best among the above list:  ('g. for geeks', 95)




In [7]:
print (process.extract(query, choices, scorer = fuzz.WRatio), '\n')

[('g. for geeks', 95), ('geek for geek', 93), ('geek geek', 86)] 



In [1]:
from fuzzywuzzy import fuzz 
from fuzzywuzzy import process 
  
s1 = "I love GeeksforGeeks"
s2 = "I am loving GeeksforGeeks"
print ("FuzzyWuzzy Ratio: ", fuzz.ratio(s1, s2)) 
print ("FuzzyWuzzy PartialRatio: ", fuzz.partial_ratio(s1, s2))
print ("FuzzyWuzzy TokenSortRatio: ", fuzz.token_sort_ratio(s1, s2)) 
print ("FuzzyWuzzy TokenSetRatio: ", fuzz.token_set_ratio(s1, s2)) 
print ("FuzzyWuzzy WRatio: ", fuzz.WRatio(s1, s2),'\n\n')
  
# for process library, 
query = 'geeks for geeks'
#choices = ['geek for geek', 'geek geek', 'g. for geeks']  
choices = ['geek for geek']  
print ("List of ratios: ")
print (process.extract(query, choices), '\n')
print ("Best among the above list: ",process.extractOne(query, choices)) 

FuzzyWuzzy Ratio:  84
FuzzyWuzzy PartialRatio:  85
FuzzyWuzzy TokenSortRatio:  84
FuzzyWuzzy TokenSetRatio:  86
FuzzyWuzzy WRatio:  84 


List of ratios: 
[('geek for geek', 93)] 

Best among the above list:  ('geek for geek', 93)




In [None]:
   result = process.extractBests(choice,
                                  strings,
                                  scorer=scorer,
                                  processor=processor,
                                  score_cutoff=100,
                                  limit=None)

In [1]:
from rapidfuzz import process, fuzz
choices = ["Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys"]
process.extract("new york jets", choices, scorer=fuzz.WRatio, limit=2)
#[('New York Jets', 100.0, 1), ('New York Giants', 78.57142857142857, 2)]


[('New York Jets', 76.92307692307692, 1),
 ('New York Giants', 64.28571428571428, 2)]

In [2]:
process.extractOne("cowboys", choices, scorer=fuzz.WRatio)
#('Dallas Cowboys', 90.0, 3)

('Dallas Cowboys', 83.07692307692308, 3)

In [3]:
process.extractOne("new york jets", choices, scorer=fuzz.WRatio)

('New York Jets', 76.92307692307692, 1)

In [14]:
import pandas as pd
pd.options.display.float_format = '{:20,.2f}'.format
from rapidfuzz import process, fuzz,utils
choice = ["AFC Barcelona", "Barcelona AFC", "barcelona fc", "afc barcalona"]
print(f' process.extract("barcelona",choice, scorer=fuzz.token_set_ratio) \n  \
      result {process.extract("barcelona",choice, scorer=fuzz.token_set_ratio)}')
print(f' process.extract("barcelona",choice, scorer=fuzz.token_set_ratio,  processor=utils.default_process) \n  \
      result {process.extract("barcelona",choice, scorer=fuzz.token_set_ratio,  processor=utils.default_process)}')



 process.extract("barcelona",choice, scorer=fuzz.token_set_ratio) 
        result [('barcelona fc', 100.0, 2), ('AFC Barcelona', 72.72727272727272, 0), ('Barcelona AFC', 72.72727272727272, 1), ('afc barcalona', 72.72727272727272, 3)]
 process.extract("barcelona",choice, scorer=fuzz.token_set_ratio,  processor=utils.default_process) 
        result [('AFC Barcelona', 100.0, 0), ('Barcelona AFC', 100.0, 1), ('barcelona fc', 100.0, 2), ('afc barcalona', 72.72727272727272, 3)]
