In [1]:
import spacy
from spacy import displacy
from daterangeparser import parse


In [3]:
nlp = spacy.load("en_core_web_lg")


In [170]:
qs = [
"""
RESEARCH REPORT
Research report on current state of architectural settings [from list of topics] presented on printed color poster [11” x 17”].
More information will be provided in class and on D2L.
Tue, 4 October
11:00 am
[at the beginning of class]
""",
"""
Work Term Record
Due: September 22 Update your Elevate Work Term Record. The below
information must be updated to ensure we can get in touch
with both you and your supervisor. If your supervisor
changes at any time during your employment, you must
email science.internship@ucalgary.ca and update your
work term record. If either you or your supervisor do not
have a phone, please write N/A.
Student's phone and email
Supervisor's name, title, phone and email
""",
"""
Exams (81% of final grade)
There are two exams (October 14 and November 18) and a final exam (to be written during the final
exam period) scheduled for this course. The first two exams are multiple-choice, non-cumulative,
and will Vodcast content and Zoom session material (including any films and other demonstrations).
The final exam is cumulative and will consist of multiple-choice questions.
""",
"""
Assessment name is Learning Objectives,
Due on October 13,
The guidelines for the Learning Objectives can be found in the Assignment Details section in D2L. You will set two performance related goals and identify one personal goal to work on over the course of the work term. Before you submit, review them with your supervisor to ensure they align with your work requirements.
"""
]

for q in qs:
    q = q.strip()
    q = q.replace("\n", " ")
    doc = nlp(q)
    displacy.render(doc, style="ent", options={"compact": True}, jupyter=True)
    displacy.render(doc, style="dep", options={"compact": True}, jupyter=True)


In [171]:
def dep_subtree(token, dep):
  # deps = [child.dep_ for child in token.children]
  child = next(filter(lambda c: c.dep_ == dep, token.children), None)
  if child != None:
    return " ".join([c.text for c in child.subtree if c.dep_ == dep])
  else:
    return ""


In [172]:
def extract_event(q):
  events = []

  for ent in filter(lambda e: e.label_ == 'DATE', doc.ents):
    try:
      start, end = parse(ent.text)
    except Exception as e:
      # could not parse the dates, hence ignore it
      continue
    current = ent.root
    while current.dep_ != "ROOT":
      current = current.head
      desc = " ".join(filter(None, [
        dep_subtree(current, "nsubj"),
        dep_subtree(current, "nsubjpass"),
        dep_subtree(current, "auxpass"),
        dep_subtree(current, "amod"),
        dep_subtree(current, "det"),
        current.text,
        dep_subtree(current, "acl"),
        dep_subtree(current, "dobj"),
        dep_subtree(current, "attr"),
        dep_subtree(current, "advmod")]))
    events = events + [(start, ent.text, desc)]

  return events

In [173]:
for q in qs:
    q = q.strip()
    q = q.replace("\n", " ")
    doc = nlp(q)
    print(extract_event(q))

[]
[(datetime.datetime(2022, 9, 22, 0, 0), 'September 22', 'Due Update Record')]
[]
[(datetime.datetime(2022, 10, 13, 0, 0), 'October 13', 'name is Objectives')]
