### Imports

In [1]:
import re
import spacy
from spacy.tokenizer import Tokenizer
custom_nlp = spacy.load('en_core_web_sm')

In [2]:
text = ('Gus Proto is a Python+developer currently working for a London$based Fintech'
         ' company. He is interested in learning Natural Language Processing.')
text

'Gus Proto is a Python+developer currently working for a London$based Fintech company. He is interested in learning Natural Language Processing.'

In [3]:
prefix_re = spacy.util.compile_prefix_regex(custom_nlp.Defaults.prefixes)
suffix_re = spacy.util.compile_suffix_regex(custom_nlp.Defaults.suffixes)
infix_re = re.compile(r'''[$\+]''')

In [4]:
def customize_tokenizer(nlp):
    # Adds support to use '-' as the delimiter for tokenization
    return Tokenizer(nlp.vocab, prefix_search=prefix_re.search,
                     suffix_search=suffix_re.search,
                     infix_finditer=infix_re.finditer,
                     token_match=None)

In [22]:
custom_nlp.tokenizer = customize_tokenizer(custom_nlp)
custom_tokenizer_doc = custom_nlp(text)
print([token.text for token in custom_tokenizer_doc])

['Gus', 'Proto', 'is', 'a', 'Python', '+', 'developer', 'currently', 'working', 'for', 'a', 'London', '$', 'based', 'Fintech', 'company', '.', 'He', 'is', 'interested', 'in', 'learning', 'Natural', 'Language', 'Processing', '.']


In [5]:
nlp = spacy.load('en_core_web_sm')
doc = nlp(text)
print([token.text for token in doc])

['Gus', 'Proto', 'is', 'a', 'Python+developer', 'currently', 'working', 'for', 'a', 'London$based', 'Fintech', 'company', '.', 'He', 'is', 'interested', 'in', 'learning', 'Natural', 'Language', 'Processing', '.']
