In [None]:
# Install Spacy the first time you run this notebook
!pip install -U pip setuptools wheel
!pip install -U spacy
!python -m spacy download en_core_web_sm

In [2]:
import spacy
from mtasklite import delayed_init

SPACY_POS = 'tagger'
SPACY_NER = 'ner'
SPACY_PARSER = 'parser'

SPACY_MODEL = 'en_core_web_sm'

@delayed_init
class SpacyTextParser:
    def __init__(self, model_name):
        self._nlp = spacy.load(model_name, disable=[SPACY_NER, SPACY_PARSER])

    def __call__(self, text):
        return [e.text for e in self._nlp(text)]

In [3]:
parser = SpacyTextParser(SPACY_MODEL)
parser('This is a simple text!')

['This', 'is', 'a', 'simple', 'text', '!']

In [4]:
from mtasklite.processes import pqdm

input_arr = [
    'Accelerate is a library that enables the same PyTorch code to be run across any distributed configuration by adding just four lines of code!',
    'In short, training and inference at scale made simple, efficient and adaptable.',
    'Built on torch_xla and torch.distributed, Accelerate takes care of the heavy lifting, so you don’t have to write any custom code to adapt to these platforms.',
    'Convert existing codebases to utilize DeepSpeed, perform fully sharded data parallelism, and have automatic support for mixed-precision training!',
    'Welcome to the Accelerate tutorials!',
    'These introductory guides will help catch you up to speed on working with Accelerate.',
    'You’ll learn how to modify your code to have it work with the API seamlessly, how to launch your script properly, and more!',
    'These tutorials assume some basic knowledge of Python and familiarity with the PyTorch framework.'
]

# Exactly 4 workers initialized with a given model name
N_JOBS=4
result = pqdm(input_arr, N_JOBS * [SpacyTextParser(SPACY_MODEL)])

list(result)

  0%|          | 0/8 [00:00<?, ?it/s]

[['Accelerate',
  'is',
  'a',
  'library',
  'that',
  'enables',
  'the',
  'same',
  'PyTorch',
  'code',
  'to',
  'be',
  'run',
  'across',
  'any',
  'distributed',
  'configuration',
  'by',
  'adding',
  'just',
  'four',
  'lines',
  'of',
  'code',
  '!'],
 ['In',
  'short',
  ',',
  'training',
  'and',
  'inference',
  'at',
  'scale',
  'made',
  'simple',
  ',',
  'efficient',
  'and',
  'adaptable',
  '.'],
 ['Built',
  'on',
  'torch_xla',
  'and',
  'torch.distributed',
  ',',
  'Accelerate',
  'takes',
  'care',
  'of',
  'the',
  'heavy',
  'lifting',
  ',',
  'so',
  'you',
  'do',
  'n’t',
  'have',
  'to',
  'write',
  'any',
  'custom',
  'code',
  'to',
  'adapt',
  'to',
  'these',
  'platforms',
  '.'],
 ['Convert',
  'existing',
  'codebases',
  'to',
  'utilize',
  'DeepSpeed',
  ',',
  'perform',
  'fully',
  'sharded',
  'data',
  'parallelism',
  ',',
  'and',
  'have',
  'automatic',
  'support',
  'for',
  'mixed',
  '-',
  'precision',
  'training',
 