In [1]:
import pandas as pd
import spacy
import en_core_web_sm
nlp = en_core_web_sm.load()

In [20]:
def FindPos(SearchSentence):
    """ Find the different tags to identify POS of search Sentence using Spacy
    --- parameters -- 
    SearchSentence is the input Search query from user
    """
    datadictionary = {}
    for token in nlp(SearchSentence):
        datadictionary.update({token.text : [token.lemma_, token.pos_, token.tag_, token.dep_, token.ent_type_, token.shape_, 
                                             token.is_alpha, token.is_stop, token.head.text, token.head.pos_, [child for child in token.children]]})
    return datadictionary

## Transform the dictionary into DataFrame
def Trans_to_dataframe(datadictionary):
    """ Convert the Dictionary into DataFrame 
    parameters
    --- datadictionary is a dictionary having different token entities"""
    df = pd.DataFrame()
    df = pd.DataFrame(datadictionary).transpose()
    df.columns=['Lemma', 'Pos', 'Tag', 'Dep', 'Ent','Shape', 'ISAlpha', 'Is_StopWord', 'Head_Text', 'Head_Pos','Children']
    return df

def RunSearch(SearchSentence):
    """ Nested Function which will run TWO different functions and return Data Frame
        This Data Frame will be iterated and Main Concept and Sub Concept will be identified 
    """
    dicta = FindPos(SearchSentence)
    DF = Trans_to_dataframe(dicta)
    return(DF)

def NounPhrasing(SearchSentence):
    """ Use Spacy Noun Chunking to identify Noun Phrases and keywords"""
    
    NounPhrasing, keyword = [], []
    D = FindPos(SearchSentence)
    odd_List = ['WP', 'WRB', 'PRP'] # Avoid Which/What to be identified as Concepts
    doc = nlp(SearchSentence)
    for chunk in doc.noun_chunks:
        if len(chunk.text.split(' ')) == 1:
            if D[chunk.text][2] not in odd_List:
                NounPhrasing.append(chunk.text)
                keyword.append(chunk.root.text)
            else:
                pass
        else:
            NounPhrasing.append(chunk.text)
            keyword.append(chunk.root.text)
        

        #print(chunk.text, chunk.label_)
    
    return NounPhrasing, keyword


In [29]:
word = "Machine learning is a advanced application science"
#word = "India won ICC Wordcup 2011"
#word = "Learn Machine Learning and Data Science from good Natural Langauge Programming eBooks"

In [31]:
NP,Key = NounPhrasing(word)
print(NP)
#print(Key)

['India', 'ICC Wordcup']
