In [10]:
import spacy
import re
nlp = spacy.load("en_core_web_sm")

action_keywords = ["has to", "needs to", "should", "must", "is required to", "is expected to"]

#time pattern
time_patterns = [
    r'\bby\s+\d{1,2}\s*(am|pm)?\b', 
    r'\bbefore\s+\w+\b',  
    r'\btomorrow\b',
    r'\btoday\b',
    r'\bin\s+\d+\s+\w+\b'  
]


# Step 1: Extract tasks

In [11]:
def extract_tasks_from_text(text):
    doc = nlp(text)
    tasks = []


    for sentence in doc.sents:
        sentence_text = sentence.text
        sentence_lower = sentence_text.lower()

    
        if any(keyword in sentence_lower for keyword in action_keywords):
            task = {"who": None, "task": None, "deadline": None}

            for token in sentence:
                if token.dep_ in {"nsubj", "nsubjpass"} and token.pos_ in {"PROPN", "PRON"}:
                    task["who"] = token.text
                    break

            for keyword in action_keywords:
                if keyword in sentence_lower:
                    task_start = sentence_lower.find(keyword) + len(keyword)
                    task["task"] = sentence_text[task_start:].strip()
                    break

            for pattern in time_patterns:
                match = re.search(pattern, sentence_lower)
                if match:
                    task["deadline"] = match.group(0)
                    break

            if task["task"]:
                tasks.append(task)

    return tasks


In [19]:
extracted_tasks = extract_tasks_from_text(text)
extracted_tasks

[{'who': 'He', 'task': 'buy the snacks for all of us.', 'deadline': None},
 {'who': 'He',
  'task': 'submit his assignment by 5 pm.',
  'deadline': 'by 5 pm'}]

# Step 2: Categorize tasks

In [12]:
# Categories of tasks
task_categories = {
    "Personal": ["buy", "get", "shop", "visit"],
    "Academic": ["submit", "study", "complete", "assignment", "exam", "project"],
    "Work": ["send", "email", "call", "schedule", "meeting"],
    "Household": ["clean", "wash", "cook", "arrange", "fix"],
}

def categorize_task_based_on_keywords(task_description):
    doc = nlp(task_description.lower())
    for category, keywords in task_categories.items():
        if any(token.text in keywords for token in doc):
            return category
    return "Uncategorized"


In [13]:
def categorize_all_extracted_tasks(extracted_tasks):
    categorized_tasks = []
    for task in extracted_tasks:
        category = categorize_task_based_on_keywords(task["task"])
        categorized_tasks.append((task, category))
    return categorized_tasks


In [20]:
categorized_tasks = categorize_all_extracted_tasks(extracted_tasks)
categorized_tasks

[({'who': 'He', 'task': 'buy the snacks for all of us.', 'deadline': None},
  'Personal'),
 ({'who': 'He',
   'task': 'submit his assignment by 5 pm.',
   'deadline': 'by 5 pm'},
  'Academic')]

# Step 3: Extract 'Who' and 'When'

In [14]:
def extract_who_and_deadline_from_task(task):
    return task["who"], task["deadline"]


In [15]:
def get_who_and_when_info_for_all_tasks(categorized_tasks):
    who_when_info = []
    for task, _ in categorized_tasks:
        who, when = extract_who_and_deadline_from_task(task)
        who_when_info.append((who, when))
    return who_when_info

In [21]:
who_when_info = get_who_and_when_info_for_all_tasks(categorized_tasks)
who_when_info 

[('He', None), ('He', 'by 5 pm')]

# Step 4: Generate final JSON output

In [16]:
import json

def generate_final_output_json(who_when_info, categorized_tasks):
    final_output = []
    for (who, when), (task, category) in zip(who_when_info, categorized_tasks):
        final_output.append({
            "Task": task["task"],
            "Who": who,
            "When": when,
            "Category": category
        })
    return json.dumps(final_output, indent=4) 


In [17]:
text = """Rahul wakes up early every day. He goes to college in the morning and comes back at 3 pm.
At present, Rahul is outside. He has to buy the snacks for all of us. 
He also needs to submit his assignment by 5 pm."""

extracted_tasks = extract_tasks_from_text(text)
categorized_tasks = categorize_all_extracted_tasks(extracted_tasks)
who_when_info = get_who_and_when_info_for_all_tasks(categorized_tasks)
final_output_json = generate_final_output_json(who_when_info, categorized_tasks)
print(final_output_json)

[
    {
        "Task": "buy the snacks for all of us.",
        "Who": "He",
        "When": null,
        "Category": "Personal"
    },
    {
        "Task": "submit his assignment by 5 pm.",
        "Who": "He",
        "When": "by 5 pm",
        "Category": "Academic"
    }
]
