Task Extractor

Import Required Libraries

In [81]:
import spacy
import re
from typing import List, Dict, Optional
import tkinter as tk
from tkinter import scrolledtext, messagebox

Define the Task Extractor class

In [82]:
class TaskExtractor:
    def __init__(self):
        # Load spaCy's English model
        self.nlp = spacy.load('en_core_web_sm')
        
        # Words that indicate a task
        self.task_triggers = {
            'needs to', 'has to', 'must', 'should', 'required to',
            'responsible for', 'to do', 'will', 'will have to', 'have to'
        }
        
        # Categories and their associated keywords
        self.task_categories = {
            'Shopping': {'buy', 'purchase', 'get', 'order'},
            'Work': {'submit', 'complete', 'prepare', 'review', 'write'},
            'Cleaning': {'clean', 'organize', 'fix', 'arrange'},
            'Communication': {'call', 'contact', 'email', 'send'},
            'General': {'make', 'check', 'do'}
        }

    def find_subject(self, sentence: str) -> Optional[str]:
        """Find the subject of the sentence using dependency parsing."""
        doc = self.nlp(sentence)
        for token in doc:
            # Check if the token is the subject of the sentence
            if token.dep_ in ("nsubj", "nsubjpass"):
                return token.text
        return None

    def resolve_pronouns(self, sentence: str, previous_sentences: List[str]) -> str:
        """Replace pronouns like 'he' or 'she' with the actual person's name."""
        words = sentence.split()
        if words and words[0].lower() in {'he', 'she', 'they', 'it', 'we', 'i'}:
            # Look for the last mentioned person in previous sentences
            for i in range(len(previous_sentences) - 1, -1, -1):
                last_person = self.find_subject(previous_sentences[i])
                if last_person:
                    words[0] = last_person
                    return " ".join(words)
        return sentence

    def extract_tasks(self, text: str) -> List[Dict]:
        """Extract tasks from the given text."""
        # Clean the text and split into sentences
        text = text.replace('\n', ' ')
        sentences = [sent.text.strip() for sent in self.nlp(text).sents]
        tasks = []

        for i, sentence in enumerate(sentences):
            # Check if the sentence contains task indicators
            if any(trigger in sentence.lower() for trigger in self.task_triggers):
                # Resolve pronouns in the sentence
                resolved_sentence = self.resolve_pronouns(sentence, sentences[:i])
                
                # Find the subject of the sentence as the assignee
                assignee = self.find_subject(resolved_sentence)
                
                # Extract the deadline (if any)
                deadline = self.extract_deadline(resolved_sentence)
                
                # Categorize the task
                category = self.categorize(resolved_sentence)
                
                # Add the task to the list
                tasks.append({
                    'task': resolved_sentence,
                    'assignee': assignee if assignee else "Unspecified",
                    'deadline': deadline if deadline else "Not specified",
                    'category': category
                })

        return tasks

    def extract_deadline(self, text: str) -> Optional[str]:
        """Extract a deadline from the text using regex."""
        patterns = [
            r'by\s+(\d{1,2}(?::\d{2})?\s*(?:am|pm|AM|PM))',
            r'at\s+(\d{1,2}(?::\d{2})?\s*(?:am|pm|AM|PM))',
            r'tomorrow',
            r'tonight',
            r'today'
        ]
        for pattern in patterns:
            match = re.search(pattern, text.lower())
            if match:
                return match.group(0)
        return None

    def categorize(self, text: str) -> str:
        """Assign a category to the task based on keywords."""
        text_lower = text.lower()
        for category, keywords in self.task_categories.items():
            if any(keyword in text_lower for keyword in keywords):
                return category
        return 'General'

Task Extractor GUI class

In [83]:
class TaskExtractorGUI:
    def __init__(self, root):
        self.root = root
        self.root.title("Task Extractor")
        
        # Create input label and text area
        self.label = tk.Label(root, text="Enter your text:")
        self.label.pack()
        
        self.input_text = scrolledtext.ScrolledText(root, width=50, height=10)
        self.input_text.pack()
        
        # Create extract button
        self.extract_button = tk.Button(root, text="Extract Tasks", command=self.extract_tasks)
        self.extract_button.pack()
        
        # Create output label and text area
        self.output_label = tk.Label(root, text="Extracted Tasks:")
        self.output_label.pack()
        
        self.output_text = scrolledtext.ScrolledText(root, width=50, height=10)
        self.output_text.pack()

        # Initialize TaskExtractor
        self.extractor = TaskExtractor()

    def extract_tasks(self):
        """Extract tasks from the input text and display them."""
        input_data = self.input_text.get("1.0", tk.END).strip()
        if not input_data:
            messagebox.showwarning("Input Error", "Please enter some text.")
            return
        tasks = self.extractor.extract_tasks(input_data)
        self.output_text.delete("1.0", tk.END)  # Clear previous output
        
        if not tasks:
            self.output_text.insert(tk.END, "No tasks found.")
            return
        
        for task in tasks:
            self.output_text.insert(tk.END, f"Task: {task['task']}\n")
            self.output_text.insert(tk.END, f"Assigned to: {task['assignee']}\n")
            self.output_text.insert(tk.END, f"Deadline: {task['deadline']}\n")
            self.output_text.insert(tk.END, f"Category: {task['category']}\n")
            self.output_text.insert(tk.END, "-" * 50 + "\n")

Run the Application

In [84]:
def main():
    root = tk.Tk()
    app = TaskExtractorGUI(root)
    root.mainloop()

if __name__ == "__main__":
    main()