# String Similarity using SPACY

#### Spacy does not provides built-in Levenshtein distance, therefore I am using Levenshtein distance provided by python.

It computes minimum edit distance between two strings by performing three operations:
1. Substitution
2. Insertion
3. Deletion

# Textacy

Higher-level NLP built on Spacy

#### N-grams are computed by using Textacy


In [80]:
import textacy
import collections
import re
import warnings
from cytoolz import itertoolz
from Levenshtein import (distance as edit_distance,
                         hamming as _hamming,
                         jaro_winkler as _jaro_winkler,
                         ratio as _ratio)
from spacy.en import English
from __future__ import unicode_literals 

In [53]:
parser = English()

In [89]:
class string_similarity:
    
    def __init__(self, parser):
        
        self.parser = parser
            
    '''
    Given sentence and value of n(denotes how many grams of text)
    n = 1 means unigram
    n = 2 means bigram and so on
    
    return grams of text
    '''
    def find_ngrams(self,text,n):
        
        parse_text = self.parser(text)
        
        return list(textacy.extract.ngrams(parse_text,n,filter_stops=False, filter_punct=True, filter_nums=False))
        
    '''
    Compute minimum edit distance between two strings
    Operations performed: deletion, insertion and substitution
    '''
    def compute_levenshtein(self, str1, str2):

        return edit_distance(str1, str2)

In [91]:
obj = string_similarity(parser)

# Levenshtein distance


In [92]:
obj.compute_levenshtein("hello","hey")

3

# N-Grams

In [88]:
sentence = "I am trying ngram using spacy"
obj.find_ngrams(sentence,2)

[I am, am trying, trying ngram, ngram using, using spacy]