In [None]:
#| default_exp helpers.spacy

# spaCy Model Helper

> Helper functions for managing spaCy models across different platforms

This module provides utilities for downloading and loading spaCy models in a platform-agnostic way. It handles the common pattern of checking if a model exists before attempting to download it, which is especially useful in notebook environments.

In [None]:
#| export
import subprocess
import sys

In [None]:
#| export
def ensure_spacy_model(model_name: str = "en_core_web_sm", verbose: bool = True):
    """
    Ensure a spaCy model is installed, downloading it if necessary.
    
    This function checks if the specified spaCy model is available. If not,
    it downloads and installs the model using spaCy's download command.
    
    Parameters
    ----------
    model_name : str, optional
        Name of the spaCy model to ensure is installed (default: "en_core_web_sm")
    verbose : bool, optional
        Whether to print status messages (default: True)
    
    Returns
    -------
    spacy.Language
        The loaded spaCy model
    
    Examples
    --------
    >>> nlp = ensure_spacy_model("en_core_web_sm")
    >>> doc = nlp("This is a test sentence.")
    >>> print([token.text for token in doc])
    ['This', 'is', 'a', 'test', 'sentence', '.']
    """
    import spacy
    
    try:
        # Try to load the model
        nlp = spacy.load(model_name)
        if verbose:
            print(f"âœ… spaCy model '{model_name}' loaded successfully")
        return nlp
    except OSError:
        # Model not found, download it
        if verbose:
            print(f"ðŸ“¥ Downloading spaCy model '{model_name}'...")
        
        subprocess.check_call(
            [sys.executable, "-m", "spacy", "download", model_name],
            stdout=subprocess.DEVNULL if not verbose else None
        )
        
        # Load the newly downloaded model
        nlp = spacy.load(model_name)
        if verbose:
            print(f"âœ… spaCy model '{model_name}' downloaded and loaded successfully")
        return nlp

In [None]:
#| export
def list_installed_models():
    """
    List all installed spaCy models.
    
    Returns
    -------
    list of str
        Names of installed spaCy models
    """
    import spacy
    return list(spacy.util.get_installed_models())

## Usage Example

In your notebooks, use this instead of manually checking for models:

In [None]:
#| eval: false
from data401_nlp.helpers.spacy import ensure_spacy_model

# This will automatically download if not present
nlp = ensure_spacy_model("en_core_web_sm")

# Now use nlp as normal
doc = nlp("This is a test.")
print([token.text for token in doc])