In [1]:
import spacy

# Load the SpaCy model
nlp = spacy.load('en_core_web_sm')

# Process the text
text = "The cat sat on the mat."
doc = nlp(text)

# Print the dependency parse
for token in doc:
    print(f'{token.text:10} {token.dep_:10} {token.head.text:10}')

# Output structure (word, dependency relation, head word)

The        det        cat       
cat        nsubj      sat       
sat        ROOT       sat       
on         prep       sat       
the        det        mat       
mat        pobj       on        
.          punct      sat       


In [2]:
# 2. Semantics: Word Embeddings with Gensim

from gensim.models import Word2Vec
from gensim.utils import simple_preprocess

# Sample sentences
sentences = [
    "The cat sat on the mat.",
    "Dogs are great pets.",
    "I love programming in Python."
]

# Tokenize the sentences
tokenized_sentences = [simple_preprocess(sentence) for sentence in sentences]

# Train a Word2Vec model
model = Word2Vec(sentences=tokenized_sentences, vector_size=50, window=3, min_count=1, sg=1)

# Find similar words
similar_words = model.wv.most_similar('cat')
print(similar_words)


[('programming', 0.1901046484708786), ('sat', 0.17440013587474823), ('are', 0.11519220471382141), ('pets', 0.10159842669963837), ('python', 0.08061393350362778), ('the', 0.04067763686180115), ('love', -0.02331056445837021), ('great', -0.029589535668492317), ('in', -0.03339873254299164), ('dogs', -0.06483341753482819)]


In [3]:
# 3. Pragmatics: Sentiment Analysis with TextBlob

from textblob import TextBlob

# Sample text
text = "I love this movie! It's fantastic."
blob = TextBlob(text)

# Get the sentiment
sentiment = blob.sentiment
print(f'Sentiment polarity: {sentiment.polarity}')
print(f'Sentiment subjectivity: {sentiment.subjectivity}')

Sentiment polarity: 0.5125
Sentiment subjectivity: 0.75


In [5]:
# 4. Discourse: Coreference Resolution with SpaCy

import spacy
import neuralcoref

# Load the SpaCy model
nlp = spacy.load('en_core_web_md')

# Add neuralcoref to the SpaCy pipeline
neural_coref = neuralcoref.NeuralCoref(nlp.vocab)
nlp.add_pipe(neural_coref, name='neural_coref', last=True)

# Process the text
text = "John went to the store. He bought some apples."
doc = nlp(text)

# Print coreferences
if doc._.has_coref:
    for cluster in doc._.coref_clusters:
        print(f"Cluster: {[mention.text for mention in cluster.mentions]}")
else:
    print("No coreferences found.")


AttributeError: [E046] Can't retrieve unregistered extension attribute 'coref_clusters'. Did you forget to call the `set_extension` method?