# Creating an Application for Metaphor Detection

In [21]:
import nltk
from nltk import *
from nltk.tag.stanford import StanfordPOSTagger
from nltk.tokenize import word_tokenize

In [22]:
java_path = "C:/Program Files/Java/jdk-18.0.1.1/bin/java.exe"
os.environ["JAVAHOME"] = java_path

jar = "C:/Users/Public/utility/stanford-postagger-full-2020-11-17/stanford-postagger.jar"
model = "C:/Users/Public/utility/stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger"

In [23]:
pos_tagger = StanfordPOSTagger(model, jar, encoding = "utf-8")

In [44]:
sentence = "The sky is ocean"

In [45]:
# Tokenization
sentence = sentence.lower()
words = nltk.word_tokenize(sentence)
print(words)

['the', 'sky', 'is', 'ocean']


In [46]:
tagged_words = pos_tagger.tag(words)
print(tagged_words)

[('the', 'DT'), ('sky', 'NN'), ('is', 'VBZ'), ('ocean', 'NN')]


In [47]:
# POS labels of nouns, pronouns, adjectives and verbs
nouns = ['NN', 'NNP', 'NNS', 'NNPS']
pronouns = ['PRP', 'PRP$']
adjectives = ['JJ', 'JJR', 'JJS']
verbs = ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']

In [48]:
import spacy

In [49]:
nlp = spacy.load("en_core_web_sm")

In [50]:
all_stopwords = nlp.Defaults.stop_words
print(all_stopwords)

{'elsewhere', 'through', 'n’t', 'nobody', 'towards', 'you', 'while', 'many', 'will', 'do', 'seemed', 'herein', 'nevertheless', 'or', 'become', 'and', 'hereupon', 'your', 'namely', 'own', 'make', 'its', 'yet', 'would', 'nor', 'further', 'ca', "'ve", 'those', 'call', 'top', 'whom', "'s", 'whereafter', 'after', '’s', 'latterly', 'who', 'within', 'has', "'ll", 'he', 'hereby', 'seems', 'as', 'his', 'sometimes', 'used', 'may', 'done', 'else', 'an', 'when', 'six', 'part', 'for', 'doing', 'yourselves', 'each', 'upon', 'ten', 'among', 'more', 'fifty', 'their', 'below', 'cannot', 'to', 'thereby', 'up', 'by', 'yours', 'something', 'herself', 'alone', 'several', '’re', 'name', 'all', 'throughout', 'becoming', 'beforehand', '’ll', 'thereafter', 'such', 'have', "'d", 'thence', 'often', 'did', 'nothing', 'serious', 'amongst', 'anyone', 'whither', 'therein', 'amount', 'get', 'at', 'four', 'here', 'some', 'had', 'less', 'about', 'back', 'empty', 'why', 'never', 'our', 'himself', 'together', 'eleven', '

In [51]:
# get noun chunks
def get_noun_phrase(sentence):
    nlp_sentence = nlp(sentence)
    chunks = list(nlp_sentence.noun_chunks)
    return chunks

verb = ""
for i in range(len(tagged_words)):
    if tagged_words[i][1] in verbs:
        verb = tagged_words[i][0]
        break

verb_ind = sentence.find(verb)
# target_phrase_lst = sentence[:verb_ind].split()
# source_phrase_lst = sentence[verb_ind+len(verb)+1:].split()

target_noun_phrases_lst = get_noun_phrase(sentence[:verb_ind])
source_noun_phrases_lst = get_noun_phrase(sentence[verb_ind+len(verb)+1:])

target_noun_phrases_lst=str(target_noun_phrases_lst[0]).split()
source_noun_phrases_lst=str(source_noun_phrases_lst[0]).split()

# target_noun_phrases_lst = target_noun_phrases.split()
# source_noun_phrases_lst = source_noun_phrases.split()

target_noun_phrases_lst_without_sw = [word for word in target_noun_phrases_lst if not word in all_stopwords]
source_noun_phrases_lst_without_sw = [word for word in source_noun_phrases_lst if not word in all_stopwords]


target = ' '.join(target_noun_phrases_lst_without_sw)
source = ' '.join(source_noun_phrases_lst_without_sw)

## Load the model for prediction

In [32]:
# load model
from keras import models
model = models.load_model('../model_keras/model.h5')

#### Prerequisites

In [33]:
from keras.utils import pad_sequences
import numpy as np

In [60]:
# kg = [('sky', 'ocean')]
# ca = [('sky',)]

# kg = [('Book', 'Creative Writing')]
# ca = [('Book',)]

kg = [(target, source)]
ca = [(target,)]

In [35]:
%store -r vocab

In [36]:
# Assign unique IDs to words in vocabulary
word_to_id = {word: i for i, word in enumerate(vocab)}

In [61]:
# Encode triples and concepts with unique IDs
encoded_triples = [[word_to_id[word] for word in each] for each in kg]
encoded_concepts = [[word_to_id[word] for word in each] for each in ca]
# Pad encoded sequences
max_length = max(len(seq) for seq in encoded_triples + encoded_concepts)
padded_triples = pad_sequences(encoded_triples, maxlen=max_length, padding='post')
padded_concepts = pad_sequences(encoded_concepts, maxlen=max_length, padding='post')

In [62]:
y_pred = model.predict([padded_triples, padded_concepts])



In [63]:
np.round(y_pred).astype(int).flatten()

array([1])