In [None]:
!pip install --quiet transformers==4.8.1 sentencepiece==0.1.95 flashtext==2.7 sentence-transformers==2.2.2 sense2vec==2.0.0 textwrap3==0.9.2
!pip install git+https://github.com/boudinfl/pke.git
nltk.download('punkt')
nltk.download('brown')
nltk.download('wordnet')
nltk.download('stopwords')

from flashtext import KeywordProcessor
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline, BartTokenizer, BartForConditionalGeneration
import nltk
from nltk.corpus import wordnet as wn
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
import string
import pke
import traceback

In [None]:
!wget https://github.com/explosion/sense2vec/releases/download/v1.0.0/s2v_reddit_2015_md.tar.gz
!tar -xvf  s2v_reddit_2015_md.tar.gz

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
summary_tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
summary_model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')

question_model = T5ForConditionalGeneration.from_pretrained('ramsrigouthamg/t5_squad_v1')
question_tokenizer = T5Tokenizer.from_pretrained('ramsrigouthamg/t5_squad_v1')
question_model = question_model.to(device)

In [None]:
import numpy as np
from sense2vec import Sense2Vec
s2v = Sense2Vec().from_disk('s2v_old')

In [None]:
def summarizer(text):
  text = text.strip().replace("\n"," ")
  text = "summarize: "+text
  input_tokens = tokenizer.batch_encode_plus ([text], return_tensors='pt', max_length=1024, truncation=True)['input_ids']
  encoded_ids = model.generate (input_tokens, num_beams=4, length_penalty=2.0, max_length=500, min_length=250, no_repeat_ngram_size=3)
  summary = tokenizer.decode(encoded_ids. squeeze (), skip_special_tokens=True)
  return summary

In [None]:
def get_nouns_multipartite(content):
    out=[]
    try:
        extractor = pke.unsupervised.MultipartiteRank()
        extractor.load_document(input=content,language='en')

        pos = {'PROPN','NOUN'}

        stoplist = list(string.punctuation)
        stoplist += ['-lrb-', '-rrb-', '-lcb-', '-rcb-', '-lsb-', '-rsb-']
        stoplist += stopwords.words('english')

        extractor.candidate_selection(pos=pos)

        extractor.candidate_weighting(alpha=1.1,
                                      threshold=0.75,
                                      method='average')
        keyphrases = extractor.get_n_best(n=15)


        for val in keyphrases:
            out.append(val[0])
    except:
        out = []
        traceback.print_exc()

    return out

In [None]:
def get_keywords(originaltext, summarytext):
  keywords = get_nouns_multipartite(originaltext)
  print ("keywords unsummarized: ",keywords)
  keyword_processor = KeywordProcessor()
  for keyword in keywords:
    keyword_processor.add_keyword(keyword)

  keywords_found = keyword_processor.extract_keywords(summarytext)
  keywords_found = list(set(keywords_found))
  print ("keywords_found in summarized: ",keywords_found)

  important_keywords =[]
  for keyword in keywords:
    if keyword in keywords_found:
      important_keywords.append(keyword)
  return important_keywords

imp_keywords = get_keywords(text, summarized_text)
print (imp_keywords)

In [None]:
def get_question(context, answer, model, tokenizer):
    text = "context: {} answer: {}".format(context, answer)
    encoding = tokenizer.encode_plus(
        text, max_length=384, pad_to_max_length=False, truncation=True, return_tensors="pt"
    ).to(device)
    input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
    outs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        early_stopping=True,
        num_beams=5,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        max_length=72,
    )
    dec = [tokenizer.decode(ids, skip_special_tokens=True) for ids in outs]
    Question = dec[0].replace("question:", "")
    Question = Question.strip()
    return Question


def categorize_question_difficulty(question, answer):

    difficulty_score = 0

    question_length = len(question.split())

    if question_length < 5:
        difficulty_score += 1
    elif question_length < 10:
        difficulty_score += 2
    else:
        difficulty_score += 3

    if answer.lower() in question.lower():
        difficulty_score -= 1

    if difficulty_score <= 2:
        return "Easy"
    elif difficulty_score <= 4:
        return "Medium"
    else:
        return "Hard"


for answer in imp_keywords:
    ques = get_question(summarized_text, answer, question_model, question_tokenizer)
    difficulty = categorize_question_difficulty(ques, answer)

    print(f"Question Difficulty: {difficulty}")
    print("Question:", ques)
    print("Answer:", answer.capitalize())
    print("\n")

In [None]:
# from textwrap3 import wrap

text = """In 1848, Frédéric Sorrieu, a French artist, prepared a series of four
prints visualising his dream of a world made up of ‘democratic
and social Republics’, as he called them. The first print (Fig. 1) of the
series, shows the peoples of Europe and America – men and women
of all ages and social classes – marching in a long train, and offering
homage to the statue of Liberty as they pass by it. As you would
recall, artists of the time of the French Revolution personified Liberty
as a female figure – here you can recognise the torch of Enlightenment
she bears in one hand and the Charter of the Rights of Man in the
other. On the earth in the foreground of the image lie the shattered
remains of the symbols of absolutist institutions. In Sorrieu’s
utopian vision, the peoples of the world are grouped as distinct
nations, identified through their flags and national costume. Leading
the procession, way past the statue of Liberty, are the United States
and Switzerland, which by this time were already nation-states. France, identifiable by the revolutionary tricolour, has just reached the statue.
She is followed by the peoples of Germany, bearing the black, red
and gold flag. Interestingly, at the time when Sorrieu created this
image, the German peoples did not yet exist as a united nation – the
flag they carry is an expression of liberal hopes in 1848 to unify the
numerous German-speaking principalities into a nation-state under
a democratic constitution. Following the German peoples are the
peoples of Austria, the Kingdom of the Two Sicilies, Lombardy,
Poland, England, Ireland, Hungary and Russia. From the heavens
above, Christ, saints and angels gaze upon the scene. They have
been used by the artist to symbolise fraternity among the nations of
the world.
This chapter will deal with many of the issues visualised by Sorrieu
in Fig. 1. During the nineteenth century, nationalism emerged as a
force which brought about sweeping changes in the political and
mental world of Europe. The end result of these changes was the
emergence of the nation-state in place of the multi-national dynastic
empires of Europe. The concept and practices of a modern state, in
which a centralised power exercised sovereign control over a clearly
defined territory, had been developing over a long period of time
in Europe. But a nation-state was one in which the majority of its
citizens, and not only its rulers, came to develop a sense of common
identity and shared history or descent. This commonness did not
exist from time immemorial; it was forged through struggles, through
the actions of leaders and the common people. This chapter will
look at the diverse processes through which nation-states and
nationalism came into being in nineteenth-century Europe.
The first clear expression of nationalism came with
the French Revolution in 1789. France, as you
would remember, was a full-fledged territorial state
in 1789 under the rule of an absolute monarch.
The political and constitutional changes that came
in the wake of the French Revolution led to the
transfer of sovereignty from the monarchy to a
body of French citizens. The revolution proclaimed
that it was the people who would henceforth
constitute the nation and shape its destiny.
From the very beginning, the French revolutionaries
introduced various measures and practices that
could create a sense of collective identity amongst
the French people. The ideas of la patrie (the
fatherland) and le citoyen (the citizen) emphasised
the notion of a united community enjoying equal rights under a
constitution. A new French flag, the tricolour, was chosen to replace
the former royal standard. The Estates General was elected by the
body of active citizens and renamed the National Assembly. New
hymns were composed, oaths taken and martyrs commemorated,
all in the name of the nation. A centralised administrative system
was put in place and it formulated uniform laws for all citizens
within its territory. Internal customs duties and dues were abolished
and a uniform system of weights and measures was adopted.
Regional dialects were discouraged and French, as it was spoken
and written in Paris, became the common language of the nation.
The revolutionaries further declared that it was the mission and the
destiny of the French nation to liberate the peoples of Europe
from despotism, in other words to help other peoples of Europe
to become nations.
When the news of the events in France reached the different cities
of Europe, students and other members of educated middle classes
began setting up Jacobin clubs. Their activities and campaigns
prepared the way for the French armies which moved into Holland,
Belgium, Switzerland and much of Italy in the 1790s. With the
outbreak of the revolutionary wars, the French armies began to
carry the idea of nationalism abroad. Within the wide swathe of territory that came under his control,
Napoleon set about introducing many of the reforms that he had
already introduced in France. Through a return to monarchy
Napoleon had, no doubt, destroyed democracy in France, but in
the administrative field he had incorporated revolutionary principles
in order to make the whole system more rational and efficient. The
Civil Code of 1804 – usually known as the Napoleonic Code –
did away with all privileges based on birth, established equality
before the law and secured the right to property. This Code was
exported to the regions under French control. In the Dutch Republic,
in Switzerland, in Italy and Germany, Napoleon simplified
administrative divisions, abolished the feudal system and freed
peasants from serfdom and manorial dues. In the towns too, guild
restrictions were removed. Transport and communication systems
were improved. Peasants, artisans, workers and new businessmen enjoyed a new-found freedom. Businessmen and small-scale
producers of goods, in particular, began to realise that uniform
laws, standardised weights and measures, and a common national
currency would facilitate the movement and exchange of goods
and capital from one region to another.
However, in the areas conquered, the reactions of the local
populations to French rule were mixed. Initially, in many places such
as Holland and Switzerland, as well as in certain cities like Brussels,
Mainz, Milan and Warsaw, the French armies were welcomed as
harbingers of liberty. But the initial enthusiasm soon turned to hostility,
as it became clear that the new administrative arrangements did not
go hand in hand with political freedom. Increased taxation,
censorship, forced conscription into the French armies required to
conquer the rest of Europe, all seemed to outweigh the advantages
of the administrative changes. If you look at the map of mid-eighteenth-century Europe you will
find that there were no ‘nation-states’ as we know them today.
What we know today as Germany, Italy and Switzerland were
divided into kingdoms, duchies and cantons whose rulers had their
autonomous territories. Eastern and Central Europe were under
autocratic monarchies within the territories of which lived diverse
peoples. They did not see themselves as sharing a collective identity
or a common culture. Often, they even spoke different languages
and belonged to different ethnic groups. The Habsburg Empire
that ruled over Austria-Hungary, for example, was a patchwork of
many different regions and peoples. It included the Alpine regions
– the Tyrol, Austria and the Sudetenland – as well as Bohemia,
where the aristocracy was predominantly German-speaking. It also
included the Italian-speaking provinces of Lombardy and Venetia.
In Hungary, half of the population spoke Magyar while the other
half spoke a variety of dialects. In Galicia, the aristocracy spoke
Polish. Besides these three dominant groups, there also lived within
the boundaries of the empire, a mass of subject peasant peoples –
Bohemians and Slovaks to the north, Slovenes in Carniola, Croats
to the south, and Roumans to the east in Transylvania. Such
differences did not easily promote a sense of political unity. The
only tie binding these diverse groups together was a common
allegiance to the emperor. """

# for wrp in wrap(text, 150):
#   print (wrp)
# print ("\n")

In 1848, Frédéric Sorrieu, a French artist, prepared a series of four prints visualising his dream of a world made up of ‘democratic and social
Republics’, as he called them. The first print (Fig. 1) of the series, shows the peoples of Europe and America – men and women of all ages and social
classes – marching in a long train, and offering homage to the statue of Liberty as they pass by it. As you would recall, artists of the time of the
French Revolution personified Liberty as a female figure – here you can recognise the torch of Enlightenment she bears in one hand and the Charter of
the Rights of Man in the other. On the earth in the foreground of the image lie the shattered remains of the symbols of absolutist institutions. In
Sorrieu’s utopian vision, the peoples of the world are grouped as distinct nations, identified through their flags and national costume. Leading the
procession, way past the statue of Liberty, are the United States and Switzerland, which by this time were alr

# **Summarization karo**

In [None]:
# import torch
# from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline, BartTokenizer, BartForConditionalGeneration
# # summary_model = T5ForConditionalGeneration.from_pretrained('t5-base')
# # summary_tokenizer = T5Tokenizer.from_pretrained('t5-base')

# # summary_model = summary_model.to(device)
# # summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
# model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')


time: 10.4 s (started: 2023-09-06 14:08:21 +00:00)


In [None]:
# import random
# import numpy as np

# def set_seed(seed: int):
#     random.seed(seed)
#     np.random.seed(seed)
#     torch.manual_seed(seed)
#     torch.cuda.manual_seed_all(seed)

# set_seed(42)

time: 4.58 ms (started: 2023-09-06 13:51:41 +00:00)


In [None]:


# def postprocesstext (content):
#   final=""
#   for sent in sent_tokenize(content):
#     sent = sent.capitalize()
#     final = final +" "+sent
#   return final




#   max_len = 512
#   encoding = tokenizer.encode_plus(text,max_length=max_len, pad_to_max_length=False,truncation=True, return_tensors="pt").to(device)

#   input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]

#   outs = model.generate(input_ids=input_ids,
#                                   attention_mask=attention_mask,
#                                   early_stopping=True,
#                                   num_beams=3,
#                                   num_return_sequences=1,
#                                   no_repeat_ngram_size=2,
#                                   min_length = 500,
#                                   max_length=1500)


#   dec = [tokenizer.decode(ids,skip_special_tokens=True) for ids in outs]
#   summary = dec[0]
#   summary = postprocesstext(summary)
#   summary= summary.strip()




# print ("\noriginal Text >>")
# for wrp in wrap(text, 150):
#   print (wrp)
# print ("\n")
# print ("Summarized Text >>")
# for wrp in wrap(summarized_text, 150):
# summarized_text = summarizer(text)
# print (summarized_text)
# print ("\n")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!



original Text >>
In 1848, Frédéric Sorrieu, a French artist, prepared a series of four prints visualising his dream of a world made up of ‘democratic and social
Republics’, as he called them. The first print (Fig. 1) of the series, shows the peoples of Europe and America – men and women of all ages and social
classes – marching in a long train, and offering homage to the statue of Liberty as they pass by it. As you would recall, artists of the time of the
French Revolution personified Liberty as a female figure – here you can recognise the torch of Enlightenment she bears in one hand and the Charter of
the Rights of Man in the other. On the earth in the foreground of the image lie the shattered remains of the symbols of absolutist institutions. In
Sorrieu’s utopian vision, the peoples of the world are grouped as distinct nations, identified through their flags and national costume. Leading the
procession, way past the statue of Liberty, are the United States and Switzerland, which by 

Not-so Great Keyword Generation

In [None]:
# from sklearn.feature_extraction.text import TfidfVectorizer
# from nltk.tokenize import sent_tokenize
# from nltk.corpus import stopwords
# nltk.download('stopwords')
# # Define a list of documents (in this case, just one document)
# documents = sent_tokenize(summarized_text)

# # Create a TfidfVectorizer
# tfidf_vectorizer = TfidfVectorizer()

# # Fit and transform the documents to calculate TF-IDF
# tfidf_matrix = tfidf_vectorizer.fit_transform(documents)

# # Get the feature names (words) from the vectorizer
# feature_names = tfidf_vectorizer.get_feature_names_out()

# # Convert the TF-IDF matrix to a dictionary for easy access
# tfidf_dict = {feature_names[i]: tfidf_matrix[0, i] for i in range(len(feature_names))}

# sorted_tfidf_dict = dict(sorted(tfidf_dict.items(), key=lambda item: item[1], reverse=True))

# # Print the TF-IDF scores for each word
# # print(sorted_tfidf_dict)
# imp_keywords = []
# for i in sorted_tfidf_dict:
#     if sorted_tfidf_dict[i] > 0:
#         imp_keywords.append(i)

# stop_words = set(stopwords.words('english'))
# imp_keywords = [word for word in imp_keywords if word not in stop_words]
# print("Removed stopwords: ", imp_keywords)

Removed stopwords:  ['1848', 'artist', 'democratic', 'dream', 'first', 'four', 'frédéric', 'made', 'prepared', 'print', 'prints', 'republics', 'sorrieu', 'visualising', 'fig', 'series', 'social', 'world', 'french']
time: 24.5 ms (started: 2023-09-06 14:40:38 +00:00)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# **Question generation with T5**

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/892M [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.86k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


time: 6min 6s (started: 2023-09-06 14:18:08 +00:00)


Question Difficulty: Medium
Question: The first print of Sorrieu's series shows who marching in a train?
Answer: Peoples


Question Difficulty: Medium
Question: Nationalism emerged as a force which brought about sweeping changes in what?
Answer: Europe


Question Difficulty: Medium
Question: Along with Holland and Italy, what country did the French armies invade in the 1790s?
Answer: Switzerland


Question Difficulty: Easy
Question: What country did Napoleon destroy democracy in?
Answer: France


Question Difficulty: Easy
Question: Who created a series of four prints in 1848?
Answer: Frédéric sorrieu


Question Difficulty: Medium
Question: What did the French armies move into in the 1790s?
Answer: Territory


Question Difficulty: Medium
Question: What statue did the people of Europe and America offer homage to?
Answer: Liberty


Question Difficulty: Easy
Question: What was Sorrieu's dream of?
Answer: World


Question Difficulty: Easy
Question: Who destroyed democracy in France?
Answer:

In [None]:
from sentence_transformers import SentenceTransformer
sentence_transformer_model = SentenceTransformer('msmarco-distilbert-base-v3')

Downloading (…)da7dc/.gitattributes:   0%|          | 0.00/690 [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)3fc4bda7dc/README.md:   0%|          | 0.00/3.71k [00:00<?, ?B/s]

Downloading (…)c4bda7dc/config.json:   0%|          | 0.00/545 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/265M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)da7dc/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/499 [00:00<?, ?B/s]

Downloading (…)3fc4bda7dc/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)4bda7dc/modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

time: 38.1 s (started: 2023-09-06 14:30:51 +00:00)


In [None]:
from similarity.normalized_levenshtein import NormalizedLevenshtein
normalized_levenshtein = NormalizedLevenshtein()

def filter_same_sense_words(original,wordlist):
  filtered_words=[]
  base_sense =original.split('|')[1]
  print (base_sense)
  for eachword in wordlist:
    if eachword[0].split('|')[1] == base_sense:
      filtered_words.append(eachword[0].split('|')[0].replace("_", " ").title().strip())
  return filtered_words

def get_highest_similarity_score(wordlist,wrd):
  score=[]
  for each in wordlist:
    score.append(normalized_levenshtein.similarity(each.lower(),wrd.lower()))
  return max(score)

def sense2vec_get_words(word,s2v,topn,question):
    output = []
    print ("word ",word)
    try:
      sense = s2v.get_best_sense(word, senses= ["NOUN", "PERSON","PRODUCT","LOC","ORG","EVENT","NORP","WORK OF ART","FAC","GPE","NUM","FACILITY"])
      most_similar = s2v.most_similar(sense, n=topn)
      output = filter_same_sense_words(sense,most_similar)
      print ("Similar ",output)
    except:
      output =[]

    threshold = 0.6
    final=[word]
    checklist =question.split()
    for x in output:
      if get_highest_similarity_score(final,x)<threshold and x not in final and x not in checklist:
        final.append(x)

    return final[1:]

def mmr(doc_embedding, word_embeddings, words, top_n, lambda_param):
    word_doc_similarity = cosine_similarity(word_embeddings, doc_embedding)
    word_similarity = cosine_similarity(word_embeddings)
    keywords_idx = [np.argmax(word_doc_similarity)]
    candidates_idx = [i for i in range(len(words)) if i != keywords_idx[0]]
    for _ in range(top_n - 1):
        candidate_similarities = word_doc_similarity[candidates_idx, :]
        target_similarities = np.max(word_similarity[candidates_idx][:, keywords_idx], axis=1)
        mmr = (lambda_param) * candidate_similarities - (1-lambda_param) * target_similarities.reshape(-1, 1)
        mmr_idx = candidates_idx[np.argmax(mmr)]
        keywords_idx.append(mmr_idx)
        candidates_idx.remove(mmr_idx)
    return [words[idx] for idx in keywords_idx]

time: 11.9 ms (started: 2023-09-06 14:31:46 +00:00)


In [None]:
from collections import OrderedDict
from sklearn.metrics.pairwise import cosine_similarity

# def get_distractors_wordnet(word):
#     distractors=[]
#     try:
#       syn = wn.synsets(word,'n')[0]

#       word= word.lower()
#       orig_word = word
#       if len(word.split())>0:
#           word = word.replace(" ","_")
#       hypernym = syn.hypernyms()
#       if len(hypernym) == 0:
#           return distractors
#       for item in hypernym[0].hyponyms():
#           name = item.lemmas()[0].name()
#           #print ("name ",name, " word",orig_word)
#           if name == orig_word:
#               continue
#           name = name.replace("_"," ")
#           name = " ".join(w.capitalize() for w in name.split())
#           if name is not None and name not in distractors:
#               distractors.append(name)
#     except:
#       print ("Wordnet distractors not found")
#     return distractors

def get_distractors(word, origsentence, sense2vecmodel, sentencemodel, top_n, lambdaval):
  distractors = sense2vec_get_words(word,sense2vecmodel,top_n,origsentence)
  print ("distractors ",distractors)
  if len(distractors) ==0:
    return distractors
  distractors_new = [word.capitalize()]
  distractors_new.extend(distractors)
  embedding_sentence = origsentence+ " "+word.capitalize()
  keyword_embedding = sentencemodel.encode([embedding_sentence])
  distractor_embeddings = sentencemodel.encode(distractors_new)
  max_keywords = min(len(distractors_new),5)
  filtered_keywords = mmr(keyword_embedding, distractor_embeddings,distractors_new,max_keywords,lambdaval)
  # filtered_keywords = filtered_keywords[1:]
  final = [word.capitalize()]
  for wrd in filtered_keywords:
    if wrd.lower() !=word.lower():
      final.append(wrd.capitalize())
  final = final[1:]
  return final

sent="What statue is depicted in the four prints?"
keyword="Liberty"

# sent = "What cryptocurrency did Musk rarely tweet about?"
# keyword = "Bitcoin"

# sent = "What did Musk say he was working with to improve system transaction efficiency?"
# keyword= "Dogecoin"

# sent = "What company did Musk say would not accept bitcoin payments?"
# keyword= "Tesla"

# sent = "What has Musk often tweeted in support of?"
# keyword = "Cryptocurrency"

print(get_distractors(keyword,sent,s2v,sentence_transformer_model, 40, 0.2))

word  Liberty
NOUN
Similar  ['Statue', 'Nation', 'Independence', 'Founding', 'Dignity', 'Freedom', 'Convention', 'Museum', 'Railroad', 'Ridge', 'Masonic', 'Glorious', 'Capitol', 'Liberty', 'Commerce', 'Corridor', 'Revolution', 'Citizen', 'Manhattan', 'Rights', 'Homestead', '-The', 'Justice', 'Maritime', 'Liberties', 'Congress', 'National', 'Act', 'Liberation', 'Colony', 'Institute', 'The', 'Chapel', 'Circus', 'Economic', 'Paradise']
distractors  ['Statue', 'Nation', 'Independence', 'Founding', 'Dignity', 'Freedom', 'Convention', 'Museum', 'Railroad', 'Ridge', 'Masonic', 'Glorious', 'Capitol', 'Commerce', 'Corridor', 'Revolution', 'Citizen', 'Manhattan', 'Rights', 'Homestead', '-The', 'Justice', 'Maritime', 'Congress', 'Act', 'Colony', 'Institute', 'Chapel', 'Circus', 'Economic', 'Paradise']
['Corridor', 'Museum', 'Economic', 'Founding']
time: 4 s (started: 2023-09-06 14:31:50 +00:00)
