In [5]:
# Install the required package
!pip install spacy

# Download the spaCy language model
!python -m spacy download en_core_web_sm




Collecting en-core-web-sm==3.7.1
  Using cached https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[38;5;2m[+] Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [6]:
# Import necessary libraries
import spacy
from spacy.tokens import DocBin
from spacy.util import minibatch, compounding
from sklearn.model_selection import train_test_split
import random

# Load spaCy's pre-trained English model
nlp = spacy.blank("en")

# Add the TextCategorizer to the pipeline
textcat = nlp.add_pipe("textcat", config={"exclusive_classes": True, "architecture": "simple_cnn"})

# Add labels to the TextCategorizer
textcat.add_label("POSITIVE")
textcat.add_label("NEGATIVE")
textcat.add_label("NEUTRAL")

ConfigValidationError: 

Config validation error
textcat -> architecture	extra fields not permitted
textcat -> exclusive_classes	extra fields not permitted
{'nlp': <spacy.lang.en.English object at 0x000001599E6ABCE0>, 'name': 'textcat', 'architecture': 'simple_cnn', 'exclusive_classes': True, 'model': {'@architectures': 'spacy.TextCatEnsemble.v2', 'linear_model': {'@architectures': 'spacy.TextCatBOW.v3', 'exclusive_classes': True, 'length': 262144, 'ngram_size': 1, 'no_output_layer': False}, 'tok2vec': {'@architectures': 'spacy.Tok2Vec.v2', 'embed': {'@architectures': 'spacy.MultiHashEmbed.v2', 'width': 64, 'rows': [2000, 2000, 500, 1000, 500], 'attrs': ['NORM', 'LOWER', 'PREFIX', 'SUFFIX', 'SHAPE'], 'include_static_vectors': False}, 'encode': {'@architectures': 'spacy.MaxoutWindowEncoder.v2', 'width': 64, 'window_size': 1, 'maxout_pieces': 3, 'depth': 2}}}, 'scorer': {'@scorers': 'spacy.textcat_scorer.v2'}, 'threshold': 0.0, '@factories': 'textcat'}

In [None]:
# Sample training data: list of tuples with text and sentiment label
data = [
    ("I love this!", {"cats": {"POSITIVE": 1.0}}),
    ("This is terrible.", {"cats": {"NEGATIVE": 1.0}}),
    ("I am indifferent about this.", {"cats": {"NEUTRAL": 1.0}}),
    ("I hate it so much!", {"cats": {"NEGATIVE": 1.0}}),
    ("This is the best thing ever!", {"cats": {"POSITIVE": 1.0}}),
    ("Not sure how I feel about this.", {"cats": {"NEUTRAL": 1.0}})
]

# Split the dataset into training and test sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

In [None]:

# Function to convert data to spaCy's DocBin format
def create_doc_bin(data):
    doc_bin = DocBin()
    for text, annotations in data:
        doc = nlp.make_doc(text)
        doc.cats = annotations["cats"]
        doc_bin.add(doc)
    return doc_bin

# Convert training and test data into DocBin format
train_docs = create_doc_bin(train_data)
test_docs = create_doc_bin(test_data)

# Training the model
n_iter = 10
optimizer = nlp.initialize()

for epoch in range(n_iter):
    random.shuffle(train_data)
    batches = minibatch(train_data, size=compounding(4.0, 32.0, 1.001))
    
    for batch in batches:
        texts, annotations = zip(*batch)
        nlp.update(texts, annotations, sgd=optimizer)

# Save the model
nlp.to_disk("sentiment_model")

# Load the saved model for inference
nlp = spacy.load("sentiment_model")

# Test the model on new examples
texts = ["I feel amazing today!", "I am not sure about this.", "This is terrible."]

for text in texts:
    doc = nlp(text)
    print(f"Text: {text} -> Sentiment: {doc.cats}")

# Evaluate model performance on test data
correct = 0
for text, annotations in test_data:
    doc = nlp(text)
    predicted = max(doc.cats, key=doc.cats.get)
    actual = max(annotations['cats'], key=annotations['cats'].get)
    print(f"Text: {text}, Predicted: {predicted}, Actual: {actual}")
    if predicted == actual:
        correct += 1

accuracy = correct / len(test_data)
print(f"Test Accuracy: {accuracy * 100:.2f}%")