## This cell contains several different functions meant to break out the tasks required to evaluate the text for a regular expression match. It is NOT the final function. 

In [1]:
import re
_NOT = "NOT"
_OR = "OR"
_AND = "AND"
_PROX = "/"

# - Split the input string containing set of words in to an array of words and append that to the result
def appendString(s, r):
  if s=="":
    return r
  s=s.strip()
  r+=s.split(" ")
  return r

# - Identifying nested structure architecture for a query string parsed on parentheses. eg: "((a b) c)" => [[['a', 'b'], 'c']]
def createNestedStructure(s):
  i=0
  result=list()
  ele = ""
  while(i<len(s)):
    if s[i]=='(':
      result = appendString(ele, result)
      ele=""
      r, k = createNestedStructure(s[i+1:])
      result.append(r)
      i+=k+1
    elif s[i]==')':
      result = appendString(ele, result)
      ele=""
      return result, i
    else:
      ele+=s[i]
    i+=1
  result = appendString(ele, result)
  return result

# - Check to see if the array contains "/n" literal. if so return the index of that literal in the array for regex use. 
def hasMultiplicity(arr):
  for i in range(len(arr)):
    if type(arr[i])==str and arr[i].startswith(_PROX):
      return i
  return -1

# - parse the nested query structure with the verbiage OR, NOT, AND, /n.
def parseStructure(struct):
  result = list()
  for ele in struct:
    if type(ele)==list:
      result.append(parseStructure(ele))
    else:
      result.append(ele)  
  struct = result
  if _NOT in struct:
    i = struct.index(_NOT)
    if len(struct)==2:
      struct = [_NOT, struct[i+1]]
    else:
      struct = struct[:i]+[[_NOT, struct[i+1]]]+struct[i+2:]
  i = hasMultiplicity(struct)
  if i>=0:
    n = int(struct[i].replace(_PROX,""))
    struct = [_PROX, n, struct[i-1], struct[i+1]]
  else:
    if _AND in struct:
      struct = [_AND, struct[0], struct[2]]
    else:
      if _OR in struct:
        struct = [_OR, struct[0], struct[2]]
      else:
        struct = " ".join(struct) if type(struct)==list and struct[0]!=_NOT and struct[0] else struct
    
  return struct

# Creating regex filters using the provided verbiage in the search terms. I used the following documentation. 
# https://docs.python.org/3/library/re.html
# https://docs.python.org/3/howto/regex.html

def createRegularExpression(struct):
  if struct[0]==_AND:
    if struct[1][0] == _NOT or struct[2][0]==_NOT:
      if struct[1][0] == _NOT:
        t_exp = createRegularExpression(struct[1][1])
        n_exp1 = "(^((?!"+t_exp+").)*)"
        n_exp2 = "(((?!"+t_exp+").)*$)"
        expr = createRegularExpression(struct[2])
      else:
        t_exp = createRegularExpression(struct[2][1])
        n_exp1 = "(^((?!"+t_exp+").)*)"
        n_exp2 = "(((?!"+t_exp+").)*$)"
        expr = createRegularExpression(struct[1])
      return n_exp1+expr+n_exp2
    else:
      expr1 = createRegularExpression(struct[1])
      expr2 = createRegularExpression(struct[2])
      return "(("+expr1+" (\S+ )*"+expr2+")|("+expr2+" (\S+ )*"+expr1+"))"
  if struct[0]==_OR:
    expr1 = createRegularExpression(struct[1])
    expr2 = createRegularExpression(struct[2])
    return "("+expr1+"|"+expr2+")"
  if struct[0]==_NOT:
    expr = createRegularExpression(struct[1])
    return "(^((?!"+expr+").)*$)"
  if struct[0]==_PROX:
    expr1 = createRegularExpression(struct[2])
    expr2 = createRegularExpression(struct[3])    
    n_str = str(struct[1])
    # return expr1+" (\S+ ){0,"+n_str+"}"+expr2
    return "(("+expr1+" (\S+ ){0,"+n_str+"}"+expr2+")|("+expr2+" (\S+ ){0,"+n_str+"}"+expr1+"))"
  return "\\b"+struct+"\\b"

## This cell contains the final function. Running this cell will prompt the user to enter a query using the four operators outlined in the Read Me section and text to search. If the search is found in the text, a boolean operator of True will be returned. A return of False will be initiated if otherwise.

In [2]:
# - see if the query matche swith a document string
def Regex_Func(query, docString):
  a = createNestedStructure(query)
  b = parseStructure(a)
  c = createRegularExpression(b)
  return re.search(c, docString, re.IGNORECASE)!=None


query = input('What are you looking for?:')
text = input('Where are you looking for it?:')

print(Regex_Func(query, text))

What are you looking for?:bananas /2 apples
Where are you looking for it?:bananas with apples
True


## TESTING / WORKING ##

In [None]:

s1 = '(apple AND NOT orange) OR green'
t1 = 'green orange and apple pie'

print(Regex_Func(s1, t1))