An LSTM layer consists of a set of recurrently connected blocks, known as memory blocks. These blocks can be thought of as a differentiable version of the memory chips in a digital computer. Each one contains one or more recurrently connected memory cells and three multiplicative units — the input, output and forget gates — that provide continuous analogues of write, read and reset operations for the cells. … The net can only interact with the cells via the gates.”

In [2]:
!pip install livelossplot

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting livelossplot
  Downloading livelossplot-0.5.5-py3-none-any.whl (22 kB)
Collecting jedi>=0.10
  Downloading jedi-0.18.1-py2.py3-none-any.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 20.2 MB/s 
Installing collected packages: jedi, livelossplot
Successfully installed jedi-0.18.1 livelossplot-0.5.5


In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(0)
plt.style.use("ggplot")
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import LSTM, Embedding, Dense
from tensorflow.keras.layers import TimeDistributed, SpatialDropout1D, Bidirectional
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from livelossplot.tf_keras import PlotLossesCallback

### Load and Explore the Data

In [4]:
data = pd.read_csv('https://raw.githubusercontent.com/thunderstroke325/60-Days-of-Data-Science-and-ML/main/datasets/data44.csv',encoding ='latin1')
data = data.fillna(method = 'ffill')
# Unique words
print(data['Word'].nunique())
print(data['Tag'].nunique())
words = list(set(data['Word'].values))
words.append('ENDPAD')
num_words = len(words)
tags= list(set(data['Tag'].values))
num_tags = len(tags)
num_words, num_tags

35178
17


(35179, 17)

### Retrieve Sentences and Tags

In [5]:
class sg(object):
    def __init__(self,data):
        self.n_sent = 1
        self.data = data
        af = lambda s: [(w,p,t) for w,p,t in zip(s['Word'].values.tolist(),
                                                s['POS'].values.tolist(),
                                                s['Tag'].values.tolist())]
        self.grouped = self.data.groupby('Sentence #').apply(af)
        self.sentences = [s for s in self.grouped]
g = sg(data)
s = g.sentences
s[2]

[('Helicopter', 'NN', 'O'),
 ('gunships', 'NNS', 'O'),
 ('Saturday', 'NNP', 'B-tim'),
 ('pounded', 'VBD', 'O'),
 ('militant', 'JJ', 'O'),
 ('hideouts', 'NNS', 'O'),
 ('in', 'IN', 'O'),
 ('the', 'DT', 'O'),
 ('Orakzai', 'NNP', 'B-geo'),
 ('tribal', 'JJ', 'O'),
 ('region', 'NN', 'O'),
 (',', ',', 'O'),
 ('where', 'WRB', 'O'),
 ('many', 'JJ', 'O'),
 ('Taliban', 'NNP', 'B-org'),
 ('militants', 'NNS', 'O'),
 ('are', 'VBP', 'O'),
 ('believed', 'VBN', 'O'),
 ('to', 'TO', 'O'),
 ('have', 'VB', 'O'),
 ('fled', 'VBN', 'O'),
 ('to', 'TO', 'O'),
 ('avoid', 'VB', 'O'),
 ('an', 'DT', 'O'),
 ('earlier', 'JJR', 'O'),
 ('military', 'JJ', 'O'),
 ('offensive', 'NN', 'O'),
 ('in', 'IN', 'O'),
 ('nearby', 'JJ', 'O'),
 ('South', 'NNP', 'B-geo'),
 ('Waziristan', 'NNP', 'I-geo'),
 ('.', '.', 'O')]

### Mappings

In [6]:
wi = {w: i+1 for i,w in enumerate(words)}
ti = {t: i for i,t, in enumerate(tags)}
wi

{'transitional': 1,
 'sculptures': 2,
 'Alasay': 3,
 'Ireland': 4,
 'half-a-degree': 5,
 'staving': 6,
 'Aridi': 7,
 'appeals': 8,
 'coltan': 9,
 'Herceptin': 10,
 'Katarina': 11,
 'Akash': 12,
 'forensics': 13,
 'recipients': 14,
 'flour': 15,
 'Clarkson': 16,
 'Secretariat': 17,
 'vanilla': 18,
 'Maulana': 19,
 'Ravi': 20,
 'tarnishing': 21,
 'tire': 22,
 'circumventing': 23,
 'Dagger': 24,
 'SHEVARDNADZE': 25,
 'YouTube': 26,
 'Abd': 27,
 'Brigades': 28,
 'Sum-41': 29,
 'Vejjajiva': 30,
 'Community': 31,
 'Flavia': 32,
 'technical': 33,
 'nullified': 34,
 'barricade': 35,
 'charming': 36,
 'passwords': 37,
 'Madagonians': 38,
 'Hen': 39,
 'geography': 40,
 'Gilgit': 41,
 '14-year-old': 42,
 'licenses': 43,
 'six-thousand': 44,
 'Harvard': 45,
 '111th': 46,
 'draining': 47,
 'Pulwama': 48,
 'Naxalites': 49,
 'cross-strait': 50,
 '180,000-employee': 51,
 'Knee': 52,
 'Protest': 53,
 'absorbed': 54,
 'addressing': 55,
 'Guillaume': 56,
 'Olympio': 57,
 'pro-Democracy': 58,
 'revered': 

### Padding and train test split

In [7]:
ml = 50
X= [[wi[w[0]] for w in s ] for s in s]
X = pad_sequences(maxlen=ml,sequences = X,padding ='post',value = num_words-1)
y= [[ti[w[2]] for w in s ] for s in s]
y = pad_sequences(maxlen=ml,sequences = y,padding ='post',value = ti["O"])
y = [to_categorical(i,num_classes = num_tags) for i in y]
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=1)

### Build and Compile Bidirectional LSTM Model

In [14]:
input_word = Input(shape=(ml,))
m = Embedding(input_dim = num_words, output_dim = ml,input_length = ml)(input_word)
m= SpatialDropout1D(0.1)(m)
m = Bidirectional(LSTM(units=100, return_sequences = True, recurrent_dropout =0.1))(m)
out= TimeDistributed(Dense(num_tags,activation = 'softmax'))(m)
m = Model(input_word,out)
m.compile(loss ='mse', optimizer ='adam')
m.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 50)]              0         
                                                                 
 embedding_1 (Embedding)     (None, 50, 50)            1758950   
                                                                 
 spatial_dropout1d_1 (Spatia  (None, 50, 50)           0         
 lDropout1D)                                                     
                                                                 
 bidirectional_1 (Bidirectio  (None, 50, 200)          120800    
 nal)                                                            
                                                                 
 time_distributed_1 (TimeDis  (None, 50, 17)           3417      
 tributed)                                                       
                                                           

### Train the Model

In [15]:
es= EarlyStopping(monitor='val_accuracy',patience=1,verbose=0,mode='max',restore_best_weights=False)
cb = [PlotLossesCallback(),es]
h = m.fit(
    x_train, np.array(y_train),
    validation_split = 0.2,
    batch_size=32,
    epochs=3,
    verbose = 1
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


### Evaluate

In [16]:
m.evaluate(x_test,np.array(y_test))



0.0013421425828710198