In [12]:
import re
import json

# --- Step 1: Preprocess Text ---
def preprocess_text(text):
    text = text.strip()
    return text

# --- Step 2: Extract Sentences ---
def extract_sentences(text):
    sentences = re.split(r'[.!?]', text)
    
    sentences = [sentence.strip() for sentence in sentences if sentence]

    print("\nExtracted Sentences:")
    print(sentences)
    
    return sentences

# --- Step 3: Extract Tasks ---
def extract_tasks(sentences):
    task_keywords = ["has to", "needs to", "should", "must", "is required to", "asked", "will"]
    extracted_tasks = [sentence for sentence in sentences if any(keyword in sentence.lower() for keyword in task_keywords)]

    print("\nExtracted Tasks:")
    print(extracted_tasks)
    
    return extracted_tasks

# --- Step 4: Extract Person Responsible & Deadline ---
def extract_details(sentence, known_names=None):    
    person = None
    deadline = None
    
    known_names = known_names or []
    
    for name in known_names:
        if name.lower() in sentence.lower():
            person = name
            break

    if not person:
        name_match = re.search(r'\b(?!The|Later|Next|By|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)\b[A-Z][a-z]+\b', sentence)
        if name_match:
            person = name_match.group()
    
    if not person:
        if "students" in sentence.lower():
            person = "Students"
        elif "teacher" in sentence.lower():
            person = "Teacher"
        elif "she" in sentence.lower():
            person = "She"
        elif "they" in sentence.lower():
            person = "They"
        elif "he" in sentence.lower():
            person = "He"
        
    
    deadline_match = re.search(r'\bby (the end of the month|tomorrow|friday|today|next monday|\d{1,2}(st|nd|rd|th)?)\b', sentence, re.IGNORECASE)
    if deadline_match:
        deadline = deadline_match.group(1)

    return person, deadline

# --- Step 5: Categorize Tasks ---
def categorize_task(task):
    if any(word in task.lower() for word in ["buy", "pick", "shop"]):
        return "Personal Task"
    elif any(word in task.lower() for word in ["start a new assignment", "homework", "submit", "review"]):
        return "Education"
    elif any(word in task.lower() for word in ["approve", "complete", "asked", "clean"]):
        return  "Work"
    else:
        return "Other"

# --- Step 6: Process Input Text ---
def process_text(text):
    clean_text = preprocess_text(text)
    sentences = extract_sentences(clean_text)
    task_sentences = extract_tasks(sentences)

    tasks_output = []
    known_names = [] 
    
    for task in task_sentences:
        person, deadline = extract_details(task, known_names)
        
        if person not in known_names and person not in ["He", "She", "They"]:
            known_names.append(person)
        
        category = categorize_task(task)
        
        tasks_output.append({
            "task": task,
            "person": person,
            "deadline": deadline,
            "category": category
        })
    
    return tasks_output

# --- Step 7: Run the Code on Updated Text ---
text = """Rahul wakes up early every day. He goes to college in the morning and comes back at 3 pm. 
At present, Rahul is outside. He has to buy snacks for all of us. The teacher asked students to complete their homework by tomorrow.
Neha must submit the report by Friday. Later, she will review the project. Rohan is required to clean the lab by today.
The project should be completed by the end of the month. By Next Monday, they will start a new assignment."""

tasks = process_text(text)

print("\n🚀 Final Extracted Tasks (JSON Format):")
print(json.dumps(tasks, indent=4))


Extracted Sentences:
['Rahul wakes up early every day', 'He goes to college in the morning and comes back at 3 pm', 'At present, Rahul is outside', 'He has to buy snacks for all of us', 'The teacher asked students to complete their homework by tomorrow', 'Neha must submit the report by Friday', 'Later, she will review the project', 'Rohan is required to clean the lab by today', 'The project should be completed by the end of the month', 'By Next Monday, they will start a new assignment']

Extracted Tasks:
['He has to buy snacks for all of us', 'The teacher asked students to complete their homework by tomorrow', 'Neha must submit the report by Friday', 'Later, she will review the project', 'Rohan is required to clean the lab by today', 'The project should be completed by the end of the month', 'By Next Monday, they will start a new assignment']

🚀 Final Extracted Tasks (JSON Format):
[
    {
        "task": "He has to buy snacks for all of us",
        "person": "He",
        "deadline"