## Loading libraries

[Link](https://github.com/davidberenstein1957/classy-classification)

In [1]:
import spacy
# or import standalone
# from classy_classification import ClassyClassifier

## SpaCy embeddings

In [2]:
data = {
    "furniture": ["This text is about chairs.",
               "Couches, benches and televisions.",
               "I really need to get a new sofa."],
    "kitchen": ["There also exist things like fridges.",
                "I hope to be getting a new stove today.",
                "Do you also have some ovens."]
}

In [3]:
# python -m spacy download en_core_web_trf

In [4]:
from spacy_curated_transformers.pipeline.transformer import DEFAULT_CONFIG

#@Language.component #` (for function components) or `@Language.factory`
#nlp.add_pipe("curated_transformer", config=DEFAULT_CONFIG)

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(


In [5]:
nlp = spacy.load("en_core_web_trf")
nlp.add_pipe(
    "classy_classification",
    config={
        "data": data,
        "model": "spacy"
    }
)

  _torch_pytree._register_pytree_node(


ValueError: [E002] Can't find factory for 'classy_classification' for language English (en). This usually happens when spaCy calls `nlp.create_pipe` with a custom component name that's not registered on the current language class. If you're using a custom component, make sure you've added the decorator `@Language.component` (for function components) or `@Language.factory` (for class components).

Available factories: attribute_ruler, tok2vec, merge_noun_chunks, merge_entities, merge_subtokens, token_splitter, doc_cleaner, parser, beam_parser, lemmatizer, trainable_lemmatizer, entity_linker, entity_ruler, tagger, morphologizer, ner, beam_ner, senter, sentencizer, spancat, spancat_singlelabel, span_finder, future_entity_ruler, span_ruler, textcat, textcat_multilabel, ja.morphologizer, curated_transformer, en.lemmatizer

In [None]:
print(nlp("I am looking for kitchen appliances.")._.cats)

## Sentence level classification

In [None]:
nlp.add_pipe(
    "classy_classification",
    config={
        "data": data,
        "model": "spacy",
        "include_sent": True
    }
)

print(nlp("I am looking for kitchen appliances. And I love doing so.").sents[0]._.cats)

## Multi-label classification

In [None]:
data = {
    "furniture": ["This text is about chairs.",
               "Couches, benches and televisions.",
               "I really need to get a new sofa.",
               "We have a new dinner table.",
               "There also exist things like fridges.",
                "I hope to be getting a new stove today.",
                "Do you also have some ovens.",
                "We have a new dinner table."],
    "kitchen": ["There also exist things like fridges.",
                "I hope to be getting a new stove today.",
                "Do you also have some ovens.",
                "We have a new dinner table.",
                "There also exist things like fridges.",
                "I hope to be getting a new stove today.",
                "Do you also have some ovens.",
                "We have a new dinner table."]
}

In [None]:
nlp = spacy.load("en_core_web_md")
nlp.add_pipe(
    "classy_classification",
    config={
        "data": data,
        "model": "spacy",
        "multi_label": True,
    }
)

print(nlp("I am looking for furniture and kitchen equipment.")._.cats)

# Output:
#
# [{"furniture": 0.92}, {"kitchen": 0.91}]

## Sentence-transfomer embeddings

In [None]:
data = {
    "furniture": ["This text is about chairs.",
               "Couches, benches and televisions.",
               "I really need to get a new sofa."],
    "kitchen": ["There also exist things like fridges.",
                "I hope to be getting a new stove today.",
                "Do you also have some ovens."]
}

In [None]:
nlp = spacy.blank("en")
nlp.add_pipe(
    "classy_classification",
    config={
        "data": data,
        "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
        "device": "gpu"
    }
)

print(nlp("I am looking for kitchen appliances.")._.cats)

# Output:
#
# [{"furniture": 0.21}, {"kitchen": 0.79}]

## Saving model

In [None]:
from classy_classification import ClassyClassifier

In [None]:
classifier = classyClassifier(data=data)

with open("./classifier.pkl", "wb") as f:
    pickle.dump(classifier, f)

f = open("./classifier.pkl", "rb")
classifier = pickle.load(f)
classifier("I am looking for kitchen appliances.")