In [1]:
import spacy
from spacy.training.example import Example
from spacy.util import minibatch, compounding
import random

# Step 1: Load a blank SpaCy model
nlp = spacy.blank("en")

# Step 2: Add the text classification pipeline component to the model
if "textcat" not in nlp.pipe_names:
    textcat = nlp.add_pipe("textcat", last=True)
else:
    textcat = nlp.get_pipe("textcat")

# Step 3: Add labels for sentiment classification (positive, negative)
textcat.add_label("POSITIVE")
textcat.add_label("NEGATIVE")

# Step 4: Create a labeled training dataset (list of tuples with text and label)
# Each label is a dictionary with {'cats': {'POSITIVE': value, 'NEGATIVE': value}}
train_data = [
    ("I love this product, it is great!",             {"cats": {"POSITIVE": 1, "NEGATIVE": 0}}),
    ("This is the worst thing I have ever bought.",   {"cats": {"POSITIVE": 0, "NEGATIVE": 1}}),
    ("I am extremely happy with my purchase!",        {"cats": {"POSITIVE": 1, "NEGATIVE": 0}}),
    ("I hate this so much, it was a waste of money.", {"cats": {"POSITIVE": 0, "NEGATIVE": 1}}),
    # Add more labeled examples here
]

# Step 5: Train the model
# Disable other pipeline components to only train the textcat component
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
with nlp.disable_pipes(*other_pipes):  # Only train textcat
    optimizer = nlp.begin_training()
    for epoch in range(10):  # Train for 10 epochs
        random.shuffle(train_data)
        losses = {}
        # Use minibatch training
        batches = minibatch(train_data, size=compounding(4.0, 32.0, 1.001))
        for batch in batches:
            examples = []
            for text, annotations in batch:
                doc = nlp.make_doc(text)
                examples.append(Example.from_dict(doc, annotations))
            nlp.update(examples, drop=0.2, losses=losses)
        print(f"Epoch {epoch + 1} Loss: {losses['textcat']}")

# Step 6: Test the model with new examples
test_texts = [
    "I absolutely love this!",
    "This is terrible and I hate it.",
    "I am not sure how I feel about this.",
]

for text in test_texts:
    doc = nlp(text)
    print(text, doc.cats)  # Returns a dictionary with POSITIVE/NEGATIVE scores

# Step 7: Save the trained model to a directory
nlp.to_disk("sentiment_model")

# To load the model later for use
# nlp2 = spacy.load("sentiment_model")



Epoch 1 Loss: 0.25
Epoch 2 Loss: 0.24965250492095947
Epoch 3 Loss: 0.24684181809425354
Epoch 4 Loss: 0.24443937838077545
Epoch 5 Loss: 0.23759663105010986
Epoch 6 Loss: 0.2380262315273285
Epoch 7 Loss: 0.21276113390922546
Epoch 8 Loss: 0.20081663131713867
Epoch 9 Loss: 0.20248420536518097
Epoch 10 Loss: 0.17722563445568085
I absolutely love this! {'POSITIVE': 0.5733842253684998, 'NEGATIVE': 0.42661580443382263}
This is terrible and I hate it. {'POSITIVE': 0.46691903471946716, 'NEGATIVE': 0.5330809950828552}
I am not sure how I feel about this. {'POSITIVE': 0.47172972559928894, 'NEGATIVE': 0.5282702445983887}
