In [2]:
import spacy

In [3]:
nlp = spacy.load('en_core_web_sm')

In [4]:
doc = nlp(u"I will google about facebook")

In [5]:
doc[-1]

facebook

In [6]:
doc[2].pos_

'VERB'

In [7]:
doc[2].tag_

'VB'

In [8]:
spacy.explain('VB')

'verb, base form'

In [9]:
for word in doc:
 print(word.text,"------>", word.pos_,word.tag_,spacy.explain(word.tag))

I ------> PRON PRP None
will ------> AUX MD None
google ------> VERB VB None
about ------> ADP IN None
facebook ------> NOUN NN None




In [10]:
doc2 = nlp(u"I left the room")
for word in doc2:
 print(word.text,"------>", word.pos_,word.tag_,spacy.explain(word.tag_))

I ------> PRON PRP pronoun, personal
left ------> VERB VBD verb, past tense
the ------> DET DT determiner
room ------> NOUN NN noun, singular or mass


In [11]:
doc3 = nlp(u"to the left of the room")
for word in doc3:
 print(word.text,"------>", word.pos_,word.tag_,spacy.explain(word.tag_))


to ------> ADP IN conjunction, subordinating or preposition
the ------> DET DT determiner
left ------> NOUN NN noun, singular or mass
of ------> ADP IN conjunction, subordinating or preposition
the ------> DET DT determiner
room ------> NOUN NN noun, singular or mass


In [12]:
doc4 = nlp(u"I read books on history")
for word in doc4:
 print(word.text,"------>", word.pos_,word.tag_,spacy.explain(word.tag))

I ------> PRON PRP None
read ------> VERB VBP None
books ------> NOUN NNS None
on ------> ADP IN None
history ------> NOUN NN None




In [13]:
doc5 = nlp(u"I have read a book on history")
for word in doc5:
 print(word.text,"------>", word.pos_,word.tag_,spacy.explain(word.tag))

I ------> PRON PRP None
have ------> AUX VBP None
read ------> VERB VBN None
a ------> DET DT None
book ------> NOUN NN None
on ------> ADP IN None
history ------> NOUN NN None




In [14]:
doc6 = nlp(u"The quick brown fox jumped over the lazy dog")

In [15]:
from spacy import displacy

In [17]:
displacy.render(doc6,style='dep',jupyter=True)

In [18]:
options={
 'distance':80,
 'compact':True,
 'color':'#fff',
 'bg':'#00a65a'
}

In [19]:
displacy.render(doc6,style='dep',jupyter=True,options=options)

In [20]:
#exp 6: Parts of speech tagging using Sequence to Sequence architecture
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
sentences = [
    'I love natural language processing',
    'Sequence to sequence models are powerful',
    'POS tagging helps understand sentence structure',]
tokenized_sentences = [sentence.split() for sentence in sentences]
pos_tags = [
    ['PRP', 'VBP', 'JJ', 'NN', 'NN', 'NN'],
    ['NN', 'TO', 'NN', 'NNS', 'VBP', 'JJ'],
    ['NNP', 'VBG', 'VBZ', 'VB', 'NN', 'NN'],]
word_vocab = set(word for sentence in tokenized_sentences for word in sentence)
pos_tag_vocab = set(tag for tags in pos_tags for tag in tags)
word2index = {word: idx + 1 for idx, word in enumerate(word_vocab)}
pos2index = {tag: idx + 1 for idx, tag in enumerate(pos_tag_vocab)}
max_sequence_length = max(len(sentence) for sentence in tokenized_sentences)
num_words = len(word_vocab) + 1
num_pos_tags = len(pos_tag_vocab) + 1
X = np.array([[word2index[word] for word in sentence] for sentence in tokenized_sentences])
y = np.array([[pos2index[tag] for tag in tags] for tags in pos_tags])
X_padded = pad_sequences(X, maxlen=max_sequence_length, padding='post')
y_padded = pad_sequences(y, maxlen=max_sequence_length, padding='post')
X_decoder_input = y_padded[:, :-1]
y_decoder_output = y_padded[:, 1:]
embedding_dim = 50
latent_dim = 100
encoder_inputs = Input(shape=(max_sequence_length,))
encoder_embedding = Embedding(num_words, embedding_dim, input_length=max_sequence_length)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]
decoder_inputs = Input(shape=(max_sequence_length-1,))
decoder_embedding = Embedding(num_pos_tags, embedding_dim, input_length=max_sequence_length-1)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense = Dense(num_pos_tags, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()
model.fit([X_padded, X_decoder_input], np.expand_dims(y_decoder_output, -1), epochs=50, verbose=1)
pos2index['PAD'] = 0
def predict_pos_tags(input_sentence):
    input_sequence = [word2index.get(word, 0) for word in input_sentence.split()]
    input_padded = pad_sequences([input_sequence], maxlen=max_sequence_length, padding='post')
    decoder_input = np.zeros((1, max_sequence_length-1))
    decoder_input[0, :-1] = [pos2index['PAD']] * (max_sequence_length-2)
    decoder_input[0, -1] = pos2index['PAD']
    predicted_output = model.predict([input_padded, decoder_input])
    predicted_tags = [np.argmax(tag) for tag in predicted_output[0]]
    pos_tags_inverse = {v: k for k, v in pos2index.items()}
    predicted_tags = [pos_tags_inverse[tag] for tag in predicted_tags]
    return list(zip(input_sentence.split(), predicted_tags))
test_sentence = "POS tagging helps understand sentence structure"
predicted_tags = predict_pos_tags(test_sentence)
print("Input Sentence:", test_sentence)
print("Predicted POS Tags:")
for word, pos_tag in predicted_tags:
    print(f"{word} - {pos_tag}")


  X = np.array([[word2index[word] for word in sentence] for sentence in tokenized_sentences])


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 6)]                  0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 5)]                  0         []                            
                                                                                                  
 embedding (Embedding)       (None, 6, 50)                900       ['input_1[0][0]']             
                                                                                                  
 embedding_1 (Embedding)     (None, 5, 50)                550       ['input_2[0][0]']             
                                                                                              