In [1]:
text="Plants are living things that grow in soil and use sunlight to make their own food. They come in many shapes and sizes, from tiny flowers to tall trees. Plants give us oxygen to breathe and provide food for animals, like fruits and vegetables."
question="What are some examples of food that plants provide for animals?"

In [2]:
import nltk
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

sentences_tokens=nltk.sent_tokenize(text)
sentences_tokens.append(question)

def sent_match(sent):
    sent_token=nltk.word_tokenize(sent.lower())
    pos_tags = nltk.pos_tag(sent_token)
    
    sent_lemma = []
    for token, pos_tag in zip(sent_token, pos_tags):
        #print(token, pos_tag[1][0].lower())
        if pos_tag[1][0].lower() in ['n', 'v', 'a', 'r']:
            lemma = lemmatizer.lemmatize(token, pos_tag[1][0].lower())
            sent_lemma.append(lemma)
            
    return sent_lemma

In [3]:
sentences_tokens=nltk.sent_tokenize(text)
sentences_tokens.append(question)
sentences_tokens

['Plants are living things that grow in soil and use sunlight to make their own food.',
 'They come in many shapes and sizes, from tiny flowers to tall trees.',
 'Plants give us oxygen to breathe and provide food for animals, like fruits and vegetables.',
 'What are some examples of food that plants provide for animals?']

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [5]:
tv=TfidfVectorizer(tokenizer=sent_match)
tv

In [6]:
tf=tv.fit_transform(sentences_tokens)
tf



<4x24 sparse matrix of type '<class 'numpy.float64'>'
	with 31 stored elements in Compressed Sparse Row format>

In [7]:
tf.toarray()

array([[0.        , 0.27144011, 0.        , 0.        , 0.        ,
        0.        , 0.21975416, 0.        , 0.        , 0.34428736,
        0.34428736, 0.34428736, 0.        , 0.21975416, 0.        ,
        0.        , 0.        , 0.34428736, 0.34428736, 0.        ,
        0.34428736, 0.        , 0.34428736, 0.        ],
       [0.        , 0.        , 0.        , 0.40824829, 0.        ,
        0.40824829, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.40824829, 0.40824829, 0.        , 0.        , 0.40824829,
        0.        , 0.40824829, 0.        , 0.        ],
       [0.29676449, 0.        , 0.37640812, 0.        , 0.        ,
        0.        , 0.24025643, 0.37640812, 0.37640812, 0.        ,
        0.        , 0.        , 0.37640812, 0.24025643, 0.29676449,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.37640812],
       [0.411

In [8]:
import pandas as pd

In [9]:
df=pd.DataFrame(tf.toarray(), columns=tv.get_feature_names_out())
df

Unnamed: 0,animal,be,breathe,come,example,flower,food,fruit,give,grow,...,provide,shape,size,soil,sunlight,tall,thing,tree,use,vegetable
0,0.0,0.27144,0.0,0.0,0.0,0.0,0.219754,0.0,0.0,0.344287,...,0.0,0.0,0.0,0.344287,0.344287,0.0,0.344287,0.0,0.344287,0.0
1,0.0,0.0,0.0,0.408248,0.0,0.408248,0.0,0.0,0.0,0.0,...,0.0,0.408248,0.408248,0.0,0.0,0.408248,0.0,0.408248,0.0,0.0
2,0.296764,0.0,0.376408,0.0,0.0,0.0,0.240256,0.376408,0.376408,0.0,...,0.296764,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.376408
3,0.41101,0.41101,0.0,0.0,0.521314,0.0,0.332748,0.0,0.0,0.0,...,0.41101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
#find the similarity
from sklearn.metrics.pairwise import cosine_similarity

In [11]:
#compute similarity between sentences
similarity_matrix=cosine_similarity(tf[-1], tf)
similarity_matrix

array([[0.25781031, 0.        , 0.40383635, 1.        ]])

In [12]:
similarity_matrix_index=similarity_matrix.argsort()[0][-2]
similarity_matrix_index

2

In [13]:
similarity_matrix_flat=similarity_matrix.flatten()
similarity_matrix_flat

array([0.25781031, 0.        , 0.40383635, 1.        ])

In [14]:
similarity_matrix_flat.sort()
similarity_matrix_flat

array([0.        , 0.25781031, 0.40383635, 1.        ])

In [15]:
coeff=similarity_matrix_flat[-2]
coeff

0.40383634666596807

In [16]:
if coeff>0.3:
    print(sentences_tokens[similarity_matrix_index])

Plants give us oxygen to breathe and provide food for animals, like fruits and vegetables.


In [17]:
#question="What are some examples of food that plants provide for animals?"