In [1]:
!pip install -U sentence-transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import re
import string
from scipy.spatial import distance
from nltk.stem.porter import PorterStemmer
from sentence_transformers import SentenceTransformer, util

In [3]:
ps = PorterStemmer()

In [4]:
model_embedder = SentenceTransformer('distilbert-base-nli-mean-tokens')

In [5]:
### Rule-Based Approach Conditions
share_conditions = ["can i ","should i ","i will", "shall i", "could i", "am i","may i","i shall"]
shared_conditions = ["i have ","i just", "i already","can be","has been"]

In [6]:
### sententences for similarity based approach

In [7]:
share = ['Can I share your email',
        'I will share your email',
        'I shall share your email',
        'May I share your email',
        'Should I share your email',
        'Am I allowed to share your email',
        'Will you help my friends if I share your email with them',
        'I am going to share your email']

In [8]:
shared = ['I have shared your email',
         'I already shared the email',
         'I have just shared your email',
         'I want to know if your email can be shared',
         'Your email has been shared to my friends',
          'I am able to share your email',
          'your email will be shared',
          'your email was shared',
          'I shared your email']

In [9]:
### Utility Functions

def stem(text):
  "get the stem of individual words"
  temp = []
  for word in text.split():
      word = ps.stem(word)
      temp.append(word)
  return " ".join(temp)

def low(text):
  "convert text to lower case"
  return text.lower()

def punc(text):
  "remove punctuations, that is, if any"
  return re.sub('[%s]'%re.escape(string.punctuation), '',text)
    

In [10]:
# pre-process sentences
for i in range(len(share)):
  share[i] = stem(punc(low(share[i])))

In [11]:
# pre-process sentences
for i in range(len(shared)):
  shared[i] = stem(punc(low(shared[i])))

In [12]:
# obtain embedding for stored sentences
share_embeddings = model_embedder.encode(share)
shared_embeddings = model_embedder.encode(shared)

In [13]:
def filter_sentence(text):

  text = punc(stem(low(text)))

  # Attempt rule based approach first
  for share in share_conditions:
    if share in text:
      return "Student wants to know if can share"
        
  for shared in shared_conditions:
    if shared in text:
      return "student has shared"
    

  # if none of the rule-based approach conditions works, check sentence similarity
  
  text_embedding = model_embedder.encode(text)

  share_scores,shared_scores = [],[]

  for i in shared_embeddings:
    shared_scores.append(util.pytorch_cos_sim(i, text_embedding).item())

  for i in share_embeddings:
    share_scores.append(util.pytorch_cos_sim(i, text_embedding).item())

  
  shared_mean = sum(shared_scores)/len(shared_scores)
  share_mean = sum(share_scores)/len(share_scores)


  if shared_mean > share_mean:
    return "Student has shared"

  else:
    return "Student wants to know if can share"


In [14]:
result = []
tests = ['i want to share your email', 'i can share your email with my friends?','i would share your email', 'am i permitted to share your email','i just shared your email']

In [15]:
for test in tests:
  result.append(filter_sentence(test))

In [16]:
result

['Student wants to know if can share',
 'Student wants to know if can share',
 'Student wants to know if can share',
 'Student wants to know if can share',
 'student has shared']