In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Embedding, Dense, TimeDistributed, Bidirectional
from tensorflow.keras.utils import plot_model
from numpy.random import seed
import tensorflow

# Set seed for reproducibility
seed(1)
tensorflow.random.set_seed(13)

# Load the data
data = pd.read_csv(r'C:\Users\STSC\Downloads\ner_dataset.csv', encoding='unicode_escape')

# Ensure there are no NaN values in critical columns before proceeding
data.fillna(method='ffill', axis=0, inplace=True)

# Verify necessary columns exist in the dataset
assert 'Word' in data.columns, "Column 'Word' not found in the dataset"
assert 'Tag' in data.columns, "Column 'Tag' not found in the dataset"

# Function to map tokens/tags to indices
def get_dict_map(data, token_or_tag):
    vocab = list(set(data[token_or_tag].to_list()))
    idx2tok = {idx: tok for idx, tok in enumerate(vocab)}
    tok2idx = {tok: idx for idx, tok in enumerate(vocab)}
    return tok2idx, idx2tok

# Generate token-to-index and tag-to-index mappings
token2idx, idx2token = get_dict_map(data, 'Word')
tag2idx, idx2tag = get_dict_map(data, 'Tag')

# Map the data to indices
data['Word_idx'] = data['Word'].map(token2idx)
data['Tag_idx'] = data['Tag'].map(tag2idx)

# Verify columns have been added
assert 'Word_idx' in data.columns, "Column 'Word_idx' not created"
assert 'Tag_idx' in data.columns, "Column 'Tag_idx' not created"

# Group data by sentence and aggregate lists
data_group = data.groupby(['Sentence #'], as_index=False)[['Word_idx', 'Tag_idx']].agg(lambda x: list(x))

# Function to pad and split data
def get_pad_train_test_val(data_group):
    n_token = len(token2idx)
    n_tag = len(tag2idx)
    
    tokens = data_group['Word_idx'].tolist()
    maxlen = max([len(s) for s in tokens])
    pad_tokens = pad_sequences(tokens, maxlen=maxlen, dtype='int32', padding='post', value=n_token - 1)

    tags = data_group['Tag_idx'].tolist()
    pad_tags = pad_sequences(tags, maxlen=maxlen, dtype='int32', padding='post', value=tag2idx["O"])
    pad_tags = [to_categorical(i, num_classes=n_tag) for i in pad_tags]

    # Split the data into train, validation, and test sets
    tokens_, test_tokens, tags_, test_tags = train_test_split(pad_tokens, pad_tags, train_size=0.8, random_state=42)
    train_tokens, val_tokens, train_tags, val_tags = train_test_split(tokens_, tags_, train_size=0.8, random_state=42)
    
    return train_tokens, val_tokens, test_tokens, train_tags, val_tags, test_tags

# Prepare train, validation, and test data
train_tokens, val_tokens, test_tokens, train_tags, val_tags, test_tags = get_pad_train_test_val(data_group)

# Define the model
input_dim = len(token2idx) + 1  # Adding 1 for padding index
output_dim = 64
maxlen = max([len(s) for s in data_group['Word_idx'].tolist()])
n_tags = len(tag2idx)

def get_bilstm_lstm_model():
    model = Sequential()

    # Embedding layer
    model.add(Embedding(input_dim=input_dim, output_dim=output_dim, input_length=maxlen))

    # Bidirectional LSTM
    model.add(Bidirectional(LSTM(units=output_dim, return_sequences=True, dropout=0.2, recurrent_dropout=0.2)))

    # LSTM layer
    model.add(LSTM(units=output_dim, return_sequences=True, dropout=0.5, recurrent_dropout=0.5))

    # TimeDistributed layer with softmax activation
    model.add(TimeDistributed(Dense(n_tags, activation="softmax")))

    # Compile the model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Build the model with an input shape
model_bilstm_lstm = get_bilstm_lstm_model()
model_bilstm_lstm.build(input_shape=(None, maxlen))  # Build the model

# Print the model summary
model_bilstm_lstm.summary()

import spacy
from spacy import displacy
nlp = spacy.load('en_core_web_sm')
text = nlp("""AI-based machine nanda learnle – as well as valuable, he said.Today,  IoT and edge processing needs have attracted AI chip start-ups that include EdgeQ,  Graphcore, Hailo, Mythic and others. Processing on the edge is constrained. Barriers include memory available, energy consumed and cost, emphasizes Hyperion’s Steve Conway.

“The embedded processors are very important, as energy use is very important,” Conway said. “The GPUs and CPUs are not tiny dies, and GPUs, particularly, use a ton of energy,” he said, referring to the relatively large silicon form factors GPUs and CPUs can take on.

Making Neurals Fit the Part

Data movement is a factor in energy consumption on the edge, advises Kris Ardis, executive director of Maxim Integrated’s microcontroller and software algorithm businesses.""")

# Render named entities
displacy.render(text, style='ent', page=True)

