In [1]:
import spacy
from spacy.language import Language


In [2]:
nlp = spacy.load('en_core_web_lg',disable = ['ner', 'parser'])
spacy_stopwords = spacy.lang.en.stop_words.STOP_WORDS
print(nlp.pipe_names)

['tok2vec', 'tagger', 'attribute_ruler', 'lemmatizer']


In [3]:
doc = """Supervised learning is the machine learning task of 
         learning a function that maps an input to an output based 
         on example input-output pairs [1] It infers a function 
         from labeled training data consisting of a set of 
         training examples [2] In supervised learning, each 
         example is a pair consisting of an input object 
         (typically a vector) and a desired output value (also 
         called the supervisory signal). A supervised learning 
         algorithm analyzes the training data and produces an 
         inferred function, which can be used for mapping new 
         examples. An optimal scenario will allow for the algorithm 
         to correctly determine the class labels for unseen 
         instances. This requires the learning algorithm to  
         generalize from the training data to unseen situations 
         in a 'reasonable' way (see inductive bias)?
      """

In [4]:
doc

"Supervised learning is the machine learning task of \n         learning a function that maps an input to an output based \n         on example input-output pairs [1] It infers a function \n         from labeled training data consisting of a set of \n         training examples [2] In supervised learning, each \n         example is a pair consisting of an input object \n         (typically a vector) and a desired output value (also \n         called the supervisory signal). A supervised learning \n         algorithm analyzes the training data and produces an \n         inferred function, which can be used for mapping new \n         examples. An optimal scenario will allow for the algorithm \n         to correctly determine the class labels for unseen \n         instances. This requires the learning algorithm to  \n         generalize from the training data to unseen situations \n         in a 'reasonable' way (see inductive bias)?\n      "

In [5]:
import re
def preprocessor(text):
    text = re.sub('<[^>]*>', '', text)
    text = re.sub('[\W]+', '', text.lower())
    return text


tokens = nlp(doc)
lemma_list = []
for token in tokens:
    if token.is_stop is False:
        token_preprocessed = preprocessor(token.lemma_)
        if token_preprocessed != '':
             lemma_list.append(nlp(token_preprocessed))
lemma_list
        
        

[supervised,
 learning,
 machine,
 learn,
 task,
 learn,
 function,
 map,
 input,
 output,
 base,
 example,
 input,
 output,
 pair,
 1,
 infer,
 function,
 label,
 training,
 datum,
 consist,
 set,
 training,
 example,
 2,
 supervised,
 learning,
 example,
 pair,
 consist,
 input,
 object,
 typically,
 vector,
 desire,
 output,
 value,
 call,
 supervisory,
 signal,
 supervised,
 learn,
 algorithm,
 analyze,
 training,
 datum,
 produce,
 infer,
 function,
 map,
 new,
 example,
 optimal,
 scenario,
 allow,
 algorithm,
 correctly,
 determine,
 class,
 label,
 unseen,
 instance,
 require,
 learning,
 algorithm,
 generalize,
 training,
 datum,
 unseen,
 situation,
 reasonable,
 way,
 inductive,
 bias]

In [6]:
key = nlp("learning")

for i in lemma_list:
    s = key.similarity(i)
    
    if s > 0.5:
        print("Key:{} \nWord Found:{} \nSimilarity score:{}".format(key,i,s))
        print("----------------------------------------------------")
    
    
    

  


Key:learning 
Word Found:learning 
Similarity score:1.0
----------------------------------------------------
Key:learning 
Word Found:learn 
Similarity score:0.7682947823942914
----------------------------------------------------
Key:learning 
Word Found:learn 
Similarity score:0.7682947823942914
----------------------------------------------------
Key:learning 
Word Found:training 
Similarity score:0.6306880168795931
----------------------------------------------------
Key:learning 
Word Found:training 
Similarity score:0.6306880168795931
----------------------------------------------------
Key:learning 
Word Found:learning 
Similarity score:1.0
----------------------------------------------------
Key:learning 
Word Found:learn 
Similarity score:0.7682947823942914
----------------------------------------------------
Key:learning 
Word Found:training 
Similarity score:0.6306880168795931
----------------------------------------------------
Key:learning 
Word Found:learning 
Similarity s