In [None]:
from heapq import heappop, heappush
from collections import deque, defaultdict
import pandas as pd

class Task:
    def __init__(self, name, value, duration, dependencies=None, mandatory=False, fractionable=False):
        self.name = name
        self.value = value
        self.duration = duration
        self.dependencies = dependencies if dependencies else []
        self.mandatory = mandatory
        self.fractionable = fractionable

    # Implementing the __lt__ method to avoid TypeError when comparing Task instances in a heap
    def __lt__(self, other):
        return (self.value / self.duration) > (other.value / other.duration)


# Step 1: Build a directed graph from the task list
def build_graph(tasks):
    graph = defaultdict(list)
    in_degree = defaultdict(int)

    # Initialize in-degrees for all tasks
    for task in tasks:
        in_degree[task.name] = 0

    for task in tasks:
        for dependency in task.dependencies:
            graph[dependency].append(task.name)
            in_degree[task.name] += 1

    return graph, in_degree


# Step 2: Topological sort to find a valid task order considering dependencies
def topological_sort(tasks):
    graph, in_degree = build_graph(tasks)
    task_map = {task.name: task for task in tasks}
    queue = deque([task.name for task in tasks if in_degree[task.name] == 0])
    sorted_tasks = []

    while queue:
        current = queue.popleft()
        sorted_tasks.append(task_map[current])
        for dependent in graph[current]:
            in_degree[dependent] -= 1
            if in_degree[dependent] == 0:
                queue.append(dependent)

    return sorted_tasks


# Step 3: Create a max heap for prioritizing tasks by value/duration ratio
def create_max_heap(sorted_tasks, available_time):
    max_heap = []
    for task in sorted_tasks:
        if task.mandatory or (available_time >= task.duration):
            heappush(max_heap, (-task.value / task.duration, task))  # Max heap by value/time ratio (Fractional Knapsack concept)
    return max_heap


# Step 4: Implement Fractional Knapsack Algorithm to prioritize tasks from the max heap until time runs out
def prioritize_tasks(max_heap, available_time):
    selected_tasks = []
    current_time = 0

    while max_heap and current_time < available_time:
        _, task = heappop(max_heap)
        if current_time + task.duration <= available_time:
            selected_tasks.append(task)
            current_time += task.duration
        elif task.fractionable:
            # Handle fractionable tasks using the Fractional Knapsack Algorithm
            fraction = (available_time - current_time) / task.duration
            selected_tasks.append(Task(f"{task.name} (Partial)", task.value * fraction, available_time - current_time))
            break

    return selected_tasks


# Step 5: Print task details
def print_tasks(tasks):
    for task in tasks:
        print(f"Task Name: {task.name}, Value: {task.value}, Duration: {task.duration}, Dependencies: {task.dependencies}, Mandatory: {task.mandatory}, Fractionable: {task.fractionable}")


# Step 6: Parse the dataset into Task objects
def parse_task_data(data):
    task_list = []
    for row in data:
        task_name = row.get('Task_1') or f"Task {len(task_list) + 1}"
        value = float(row.get('Q4', 0))  # Benefit
        duration = float(row.get('Q3', 0))  # Estimated time
        dependencies = row.get('Q2', "").split(",") if row.get('Q2') else []
        mandatory = row.get('Q1', "").strip().lower() == "yes"
        fractionable = row.get('Q5', "").strip().lower() == "yes"

        task = Task(task_name, value, duration, dependencies, mandatory, fractionable)
        task_list.append(task)

    return task_list


# Step 7: Main function
def main():
    # Load your dataset
    file_path = '/path/to/Cleaned_Task_Dataset.csv'  # Replace with the actual dataset file path
    new_data = pd.read_csv(file_path)

    # Parse tasks from the dataset
    task_list = parse_task_data(new_data.to_dict('records'))

    available_time = 100  # Total time available for the month
    sorted_tasks = topological_sort(task_list)
    max_heap = create_max_heap(sorted_tasks, available_time)
    prioritized_tasks = prioritize_tasks(max_heap, available_time)

    print("Prioritized tasks:")
    print_tasks(prioritized_tasks)


if __name__ == "__main__":
    main()
