In [26]:
import nltk
import codecs
from nltk.tokenize import PunktSentenceTokenizer
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from nltk.corpus import wordnet
import spacy
from nltk.corpus import wordnet as wn
from scipy.spatial.distance import cosine
import gensim.downloader as api

In [27]:
# Load the pre-trained GloVe word embeddings.
model = api.load("glove-wiki-gigaword-200")

In [28]:
# Remove Stop Words . Word Stemming . Return new tokenised list.
def filter_sentence(sentence):
    filtered_sent = []
    filtered_dict = set()
    lemmatizer = WordNetLemmatizer()
    ps = PorterStemmer()

    stop_words = set(stopwords.words("english"))
    words = word_tokenize(sentence)
    tagged_words = nltk.pos_tag(words)

    for w, pos in tagged_words:
        if pos.startswith('NN') and w not in stop_words:
            new = lemmatizer.lemmatize(ps.stem(w))
            if new not in filtered_dict:
                word_vec = get_vector_or_none(new)
                if word_vec is not None:
                    filtered_sent.append(word_vec)
                filtered_dict.add(new)
                for i in create_sym(w):
                    if i not in filtered_dict:
                        sym_vec = get_vector_or_none(i)
                        if sym_vec is not None:
                            filtered_sent.append(sym_vec)
                        filtered_dict.add(i)

    return filtered_sent

In [29]:
# Add synonyms to match list
def create_sym(word):
    synonyms = []

    for syn in wordnet.synsets(word):
        for i in syn.lemmas():
            synonyms.append(i.name())

    return synonyms

In [30]:
def get_vector_or_none(word):
    try:
        word_vec = model[word]
        return word_vec
    except:
        return None

In [31]:
def check_sim(w1_vec, w2_vec):
    # Get the vector representations of the two words.
    return 1 - cosine(w1_vec, w2_vec)

In [32]:
def WSD_Test_Tissue(sentence):
    target_sent = sentence
    file1 = codecs.open("tissue_organ.txt", 'r', 'utf-8')
    sent1 = file1.read().lower()
    file2 = codecs.open("tissue_paper.txt", 'r', "utf-8")
    sent2 = file2.read().lower()

    filtered_sent1 = filter_sentence(sent1)
    filtered_sent2 = filter_sentence(sent2)
    filtered_target = filter_sentence(target_sent)

    target_1_similarity = 0
    target_2_similarity = 0
    
    for i in filtered_target:
        for j in filtered_sent1:
            target_1_similarity = target_1_similarity + check_sim(i, j)

        for j in filtered_sent2:
            target_2_similarity = target_2_similarity + check_sim(i, j)
    
    if target_1_similarity > target_2_similarity:
        return 1
    else:
        return 2

In [39]:
def WSD_Test_Rubbish(sentence):
    target_sent = sentence
    file1 = codecs.open("rubbish_trash.txt", 'r', 'utf-8')
    sent1 = file1.read().lower()
    file2 = codecs.open("rubbish_bull.txt", 'r', "utf-8")
    sent2 = file2.read().lower()

    filtered_sent1 = filter_sentence(sent1)
    filtered_sent2 = filter_sentence(sent2)
    filtered_target = filter_sentence(target_sent)

    target_1_similarity = 0
    target_2_similarity = 0
    
    for i in filtered_target:
        for j in filtered_sent1:
            target_1_similarity = target_1_similarity + check_sim(i, j)

        for j in filtered_sent2:
            target_2_similarity = target_2_similarity + check_sim(i, j)
    
    if target_1_similarity > target_2_similarity:
        return 1
    else:
        return 2

In [43]:
def WSD_Test_Yarn(sentence):
    target_sent = sentence
    file1 = codecs.open("yarn_recital.txt", 'r', 'utf-8')
    sent1 = file1.read().lower()
    file2 = codecs.open("yarn_thread.txt", 'r', "utf-8")
    sent2 = file2.read().lower()

    filtered_sent1 = filter_sentence(sent1)
    filtered_sent2 = filter_sentence(sent2)
    filtered_target = filter_sentence(target_sent)

    target_1_similarity = 0
    target_2_similarity = 0
    
    for i in filtered_target:
        for j in filtered_sent1:
            target_1_similarity = target_1_similarity + check_sim(i, j)

        for j in filtered_sent2:
            target_2_similarity = target_2_similarity + check_sim(i, j)
    
    if target_1_similarity > target_2_similarity:
        return 1
    else:
        return 2

In [36]:
sense1 = [
    "The tissue in his injured leg was slowly healing over time.",
    "The doctor examined the tissue under a microscope to look for abnormalities.",
    "She donated tissue for research purposes to help find a cure for the disease.",
    "The plant's tissue was affected by the harsh weather conditions and began to wilt.",
    "The tumor was growing in the brain tissue and causing severe symptoms.",
#     "He studied the tissue samples to learn more about the genetic makeup of the organism.",
#     "The tissue in the heart is responsible for pumping blood throughout the body.",
#     "She suffered a tear in the muscle tissue during the workout.",
#     "The biopsy revealed abnormal tissue growth that required further examination.",
#     "The tissue lining the stomach protects it from the acidic environment.",
#     "The tissue around the wound was swollen and inflamed.",
#     "The disease attacks the nervous tissue in the brain and spinal cord.",
#     "The tissue in the bone marrow produces new blood cells.",
#     "The bird's beak is made of a tough tissue that can break through hard shells.",
#     "The fruit's tissue contains a high amount of water and nutrients.",
#     "The tumor was successfully removed without damaging surrounding tissue.",
#     "He needed surgery to repair the damaged tissue in his knee.",
#     "The tissue in the liver filters and processes toxins from the body.",
#     "The flower's tissue is delicate and easily damaged by strong winds.",
#     "The organism's tissue was able to regenerate after being injured.",
#     "The biopsy showed that the tissue was cancerous and required immediate treatment.",
#     "The tissue in the lungs is responsible for exchanging oxygen and carbon dioxide.",
#     "The tissue in the skin helps to regulate body temperature and protect against pathogens.",
#     "The tissue in the eyes allows us to see by transmitting visual information to the brain.",
#     "The athlete strained the soft tissue in his ankle and had to sit out the game.",
#     "The tumor was located in the connective tissue and required specialized treatment.",
#     "The tissue in the blood vessels is responsible for regulating blood pressure and flow.",
#     "The tissue in the intestines absorbs nutrients from food and eliminates waste.",
#     "The tissue in the kidneys filters waste and excess water from the blood.",
#     "The organism's tissue was damaged by exposure to radiation.",
]

# sense2 = [
#     "I used tissue paper to wrap the gift.",
#  "She blew her nose with a tissue.",
#  "He wiped his tears away with a tissue.",
#  "The tissue was soft and absorbent.",
#  "She dabbed some perfume on her wrist with a tissue.",
#  "I always keep a pack of tissue paper in my bag.",
#  "He carefully cleaned the wound with a tissue.",
#  "She wiped off her lipstick with a tissue.",
#  "The tissue stuck to his sweaty forehead.",
#  "She folded the tissue neatly and put it away.",
#  "I threw the used tissue into the trash.",
#  "He sneezed into a tissue and threw it away.",
#  "She used a tissue to clean the smudges off her glasses.",
#  "I unfolded the tissue and saw a message written on it.",
#  "The tissue was too thin and tore easily.",
#  "She crumpled the tissue in her hand and threw it at him.",
#  "The tissue floated away in the wind.",
#  "He used a tissue to blot the excess oil from his face.",
#  "She carefully wrapped the delicate object with tissue paper.",
#  "I wiped the spilled coffee off the table with a tissue.",
#  "He pulled out a tissue and blew his nose loudly.",
#  "She used a tissue to wipe the sweat from her forehead.",
#  "The tissue box was empty and needed to be refilled.",
#  "I tore off a piece of tissue and used it to clean the spilled sauce.",
#  "He nervously wiped his palms with a tissue.",
#  "She held the tissue to her mouth to stifle her sobs.",
#  "The tissue was scented with lavender.",
#  "I balled up the tissue and threw it in the trash can.",
#  "He wrapped the fragile ornament with tissue paper to protect it during transport.",
#  "She used a tissue to dab at the sweat on her upper lip.",
#  "The tissue disintegrated when it was wet."
# ]

def count_ones(arr):
    count = 0
    for val in arr:
        if val == 1:
            count += 1
    return count

print("Sense 1")
sense_1_results = []
import timeit
start = timeit.default_timer()
for sent in sense1:
    sense_1_results.append(WSD_Test_Tissue(sent))
stop = timeit.default_timer()
print('Time: ', stop - start)  
print(f"accuracy: {count_ones(sense_1_results)/len(sense_1_results)}")

# print("Sense 2")
# sense_2_results = []
# for sent in sense2:
#     sense_2_results.append(WSD_Test_Tissue(sent))
# print(f"accuracy: {(len(sense_2_results)-count_ones(sense_2_results))/len(sense_2_results)}")

Sense 1
Time:  3.2946856999999454
accuracy: 1.0


In [42]:
def count_ones(arr):
    count = 0
    for val in arr:
        if val == 1:
            count += 1
    return count
filename = "testp2.txt"   # Replace with your file name
lines = []                 # Initialize an empty list to store lines

with open(filename, "r") as file:
    for line in file:
        lines.append(line.strip())  
print(sents)
print("Sense 1")
sense_1_results = []
import timeit
start = timeit.default_timer()
for line in lines:
    print(line)
    sense_1_results.append(WSD_Test_Rubbish(line))
stop = timeit.default_timer()
print('Time: ', stop - start)  
print(f"accuracy: {count_ones(sense_1_results)/len(sense_1_results)}")
print(sense_1_results)

# print("Sense 2")
# sense_2_results = []
# for sent in sense2:
#     sense_2_results.append(WSD_Test_Tissue(sent))
# print(f"accuracy: {(len(sense_2_results)-count_ones(sense_2_results))/len(sense_2_results)}")

we charge by the amount of space that rubbish takes up. 
after the 9/11 attacks, much of the debris was taken to fresh kills - the former rubbish dump for the city.
bring back the days when we used to talk rubbish about men, sex and any other silly subject that made us laugh. 
the three-part recycling bin makes easy work of sorting your rubbish before collection day. 
traditionally you shouldn't shower, wash your hair, take the rubbish out, or sweep the floor on the first day of chinese new year.

Sense 1
We charge by the amount of space that rubbish takes up.
rubbish called
After the 9/11 attacks, much of the debris was taken to Fresh Kills - the former rubbish dump for the city.
rubbish called
Bring back the days when we used to talk rubbish about men, sex and any other silly subject that made us laugh.
rubbish called
The three-part recycling bin makes easy work of sorting your rubbish before collection day.
rubbish called
Traditionally you shouldn't shower, wash your hair, take the 