In [1]:
from dotenv import load_dotenv
import os

# load .env
load_dotenv('/home/sangbin_yun/dev/goal_directedness/')

API_KEY = os.environ.get('OPENAI_API_KEY')

# dataset

In [None]:
from datasets import load_dataset

train_set = load_dataset("hotpot_qa", 'distractor', split='train', trust_remote_code=True)
dev_set = load_dataset("hotpot_qa", 'distractor', split='validation', trust_remote_code=True)

In [None]:
train_set[0]['context']['sentences']

In [25]:
import os
import argparse

parser = argparse.ArgumentParser()

glove_word_file = "glove.840B.300d.txt"

word_emb_file = "word_emb.json"
char_emb_file = "char_emb.json"
train_eval = "train_eval.json"
dev_eval = "dev_eval.json"
test_eval = "test_eval.json"
word2idx_file = "word2idx.json"
char2idx_file = "char2idx.json"
idx2word_file = 'idx2word.json'
idx2char_file = 'idx2char.json'
train_record_file = 'train_record.pkl'
dev_record_file = 'dev_record.pkl'
test_record_file = 'test_record.pkl'


parser.add_argument('--mode', type=str, default='train')
parser.add_argument('--data_file', type=str)
parser.add_argument('--glove_word_file', type=str, default=glove_word_file)
parser.add_argument('--save', type=str, default='HOTPOT')

parser.add_argument('--word_emb_file', type=str, default=word_emb_file)
parser.add_argument('--char_emb_file', type=str, default=char_emb_file)
parser.add_argument('--train_eval_file', type=str, default=train_eval)
parser.add_argument('--dev_eval_file', type=str, default=dev_eval)
parser.add_argument('--test_eval_file', type=str, default=test_eval)
parser.add_argument('--word2idx_file', type=str, default=word2idx_file)
parser.add_argument('--char2idx_file', type=str, default=char2idx_file)
parser.add_argument('--idx2word_file', type=str, default=idx2word_file)
parser.add_argument('--idx2char_file', type=str, default=idx2char_file)

parser.add_argument('--train_record_file', type=str, default=train_record_file)
parser.add_argument('--dev_record_file', type=str, default=dev_record_file)
parser.add_argument('--test_record_file', type=str, default=test_record_file)

parser.add_argument('--glove_char_size', type=int, default=94)
parser.add_argument('--glove_word_size', type=int, default=int(2.2e6))
parser.add_argument('--glove_dim', type=int, default=300)
parser.add_argument('--char_dim', type=int, default=8)

parser.add_argument('--para_limit', type=int, default=1000)
parser.add_argument('--ques_limit', type=int, default=80)
parser.add_argument('--sent_limit', type=int, default=100)
parser.add_argument('--char_limit', type=int, default=16)

parser.add_argument('--batch_size', type=int, default=64)
parser.add_argument('--checkpoint', type=int, default=1000)
parser.add_argument('--period', type=int, default=100)
parser.add_argument('--init_lr', type=float, default=0.5)
parser.add_argument('--keep_prob', type=float, default=0.8)
parser.add_argument('--hidden', type=int, default=80)
parser.add_argument('--char_hidden', type=int, default=100)
parser.add_argument('--patience', type=int, default=1)
parser.add_argument('--seed', type=int, default=13)

parser.add_argument('--sp_lambda', type=float, default=0.0)

parser.add_argument('--data_split', type=str, default='train')
parser.add_argument('--fullwiki', action='store_true')
parser.add_argument('--prediction_file', type=str)
parser.add_argument('--sp_threshold', type=float, default=0.3)

config = parser.parse_args(args=[])


# Validation

In [None]:
def predict(data_source, model, eval_file, config, prediction_file):
    answer_dict = {}
    sp_dict = {}
    sp_th = config.sp_threshold
    for step, data in enumerate(tqdm(data_source)):
        '''
        Dataset load
        '''

        '''
        Model prediction
        answer_dict_ = 
        answer_dict.update(answer_dict_)
        '''

        predict_support_np = torch.sigmoid(predict_support[:, :, 1]).data.cpu().numpy()
        for i in range(predict_support_np.shape[0]):
            cur_sp_pred = []
            cur_id = data['ids'][i]
            for j in range(predict_support_np.shape[1]):
                if j >= len(eval_file[cur_id]['sent2title_ids']): break
                if predict_support_np[i, j] > sp_th:
                    cur_sp_pred.append(eval_file[cur_id]['sent2title_ids'][j])
            sp_dict.update({cur_id: cur_sp_pred})

    prediction = {'answer': answer_dict, 'sp': sp_dict}
    with open(prediction_file, 'w') as f:
        json.dump(prediction, f)

In [None]:
import sys
import ujson as json
import re
import string
from collections import Counter
import pickle

def normalize_answer(s):

    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))


def f1_score(prediction, ground_truth):
    normalized_prediction = normalize_answer(prediction)
    normalized_ground_truth = normalize_answer(ground_truth)

    ZERO_METRIC = (0, 0, 0)

    if normalized_prediction in ['yes', 'no', 'noanswer'] and normalized_prediction != normalized_ground_truth:
        return ZERO_METRIC
    if normalized_ground_truth in ['yes', 'no', 'noanswer'] and normalized_prediction != normalized_ground_truth:
        return ZERO_METRIC

    prediction_tokens = normalized_prediction.split()
    ground_truth_tokens = normalized_ground_truth.split()
    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
    num_same = sum(common.values())
    if num_same == 0:
        return ZERO_METRIC
    precision = 1.0 * num_same / len(prediction_tokens)
    recall = 1.0 * num_same / len(ground_truth_tokens)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1, precision, recall


def exact_match_score(prediction, ground_truth):
    return (normalize_answer(prediction) == normalize_answer(ground_truth))

def update_answer(metrics, prediction, gold):
    em = exact_match_score(prediction, gold)
    f1, prec, recall = f1_score(prediction, gold)
    metrics['em'] += float(em)
    metrics['f1'] += f1
    metrics['prec'] += prec
    metrics['recall'] += recall
    return em, prec, recall

def update_sp(metrics, prediction, gold):
    cur_sp_pred = set(map(tuple, prediction))
    gold_sp_pred = set(map(tuple, gold))
    tp, fp, fn = 0, 0, 0
    for e in cur_sp_pred:
        if e in gold_sp_pred:
            tp += 1
        else:
            fp += 1
    for e in gold_sp_pred:
        if e not in cur_sp_pred:
            fn += 1
    prec = 1.0 * tp / (tp + fp) if tp + fp > 0 else 0.0
    recall = 1.0 * tp / (tp + fn) if tp + fn > 0 else 0.0
    f1 = 2 * prec * recall / (prec + recall) if prec + recall > 0 else 0.0
    em = 1.0 if fp + fn == 0 else 0.0
    metrics['sp_em'] += em
    metrics['sp_f1'] += f1
    metrics['sp_prec'] += prec
    metrics['sp_recall'] += recall
    return em, prec, recall

def eval(prediction_file, gold_file):
    with open(prediction_file) as f:
        prediction = json.load(f)
    with open(gold_file) as f:
        gold = json.load(f)

    metrics = {'em': 0, 'f1': 0, 'prec': 0, 'recall': 0,
        'sp_em': 0, 'sp_f1': 0, 'sp_prec': 0, 'sp_recall': 0,
        'joint_em': 0, 'joint_f1': 0, 'joint_prec': 0, 'joint_recall': 0}
    for dp in gold:
        cur_id = dp['_id']
        can_eval_joint = True
        if cur_id not in prediction['answer']:
            print('missing answer {}'.format(cur_id))
            can_eval_joint = False
        else:
            em, prec, recall = update_answer(
                metrics, prediction['answer'][cur_id], dp['answer'])
        if cur_id not in prediction['sp']:
            print('missing sp fact {}'.format(cur_id))
            can_eval_joint = False
        else:
            sp_em, sp_prec, sp_recall = update_sp(
                metrics, prediction['sp'][cur_id], dp['supporting_facts'])

        if can_eval_joint:
            joint_prec = prec * sp_prec
            joint_recall = recall * sp_recall
            if joint_prec + joint_recall > 0:
                joint_f1 = 2 * joint_prec * joint_recall / (joint_prec + joint_recall)
            else:
                joint_f1 = 0.
            joint_em = em * sp_em

            metrics['joint_em'] += joint_em
            metrics['joint_f1'] += joint_f1
            metrics['joint_prec'] += joint_prec
            metrics['joint_recall'] += joint_recall

    N = len(gold)
    for k in metrics.keys():
        metrics[k] /= N

    print(metrics)

if __name__ == '__main__':
    eval(sys.argv[1], sys.argv[2])



# Model

In [None]:
# Import necessary libraries
import dspy
from dspy.predict.chain_of_thought import ChainOfThought
from react.agent import ReasoningAgent
import ray  # For parallelization of subtasks
import torch  # PyTorch for model training and task handling
from langgraph import LangGraph, Node, Edge  # Importing LangGraph library


# Horizontal Component: Define a sequential, goal-directed task solver
class HorizontalTaskSolver:
    def __init__(self, task_sequence):
        self.task_sequence = task_sequence  # List of tasks to be solved step-by-step

    def solve(self):
        solution = []
        for task in self.task_sequence:
            # Use a chain of thought to reason through each task
            rationale, result = self.reason_task(task)
            solution.append({"task": task, "rationale": rationale, "result": result})
        return solution

    def reason_task(self, task):
        # Chain of Thought style reasoning: explain each step
        cot = ChainOfThought(task)
        rationale = cot.generate_rationale()  # Generate reasoning for this task
        result = cot.predict()  # Predict the outcome
        return rationale, result


# Vertical Component: Define a parallel subtask solver
@ray.remote  # Distribute subtasks in parallel
class VerticalTaskSolver:
    def __init__(self, subtasks):
        self.subtasks = subtasks

    def solve(self):
        results = []
        for subtask in self.subtasks:
            try:
                result = self.process_subtask(subtask)
                results.append({"subtask": subtask, "result": result, "status": "success"})
            except Exception as e:
                results.append({"subtask": subtask, "result": None, "status": "failed", "error": str(e)})
        return results

    def process_subtask(self, subtask):
        # Process each subtask independently (e.g., classification, data filtering)
        # Placeholder for task-specific processing logic
        result = ReasoningAgent(subtask).act()  # Reason and act on the subtask
        return result

# Divide the user prompt into horizontal and vertical tasks
class TaskDivider(dspy.Module):
    def __init__(self):
        super().__init__()
        self.divider = dspy.ChainOfThought("Analyze the given prompt and divide it into sequential tasks and parallel subtasks.")

    def forward(self, prompt: str) -> Tuple[List[str], List[str]]:
        analysis = self.divider(prompt=prompt)
        horizontal_tasks = analysis.horizontal_tasks
        vertical_subtasks = analysis.vertical_subtasks
        return horizontal_tasks, vertical_subtasks

# Orchestrate Horizontal and Vertical Processing
class TaskOrchestrator:
    def __init__(self):
        self.task_divider = TaskDivider()
        self.graph = LangGraph()  # Initialize LangGraph

    def divide_and_create_graph(self, user_prompt):
        # Step 1: Divide tasks using TaskDivider
        horizontal_tasks, vertical_subtasks = self.task_divider(user_prompt)

        # Step 2: Build LangGraph nodes and edges
        horizontal_solver = HorizontalTaskSolver(horizontal_tasks)
        vertical_solver = VerticalTaskSolver.remote(vertical_subtasks)

        # Create nodes for LangGraph
        horizontal_node = Node(name="Horizontal Task Solver", task=horizontal_solver.solve)
        vertical_node = Node(name="Vertical Task Solver", task=vertical_solver.solve.remote)

        # Add nodes to graph
        self.graph.add_node(horizontal_node)
        self.graph.add_node(vertical_node)

        # Create edge to connect horizontal and vertical tasks (if needed)
        # For instance, if vertical tasks depend on results of horizontal tasks
        self.graph.add_edge(Edge(from_node=horizontal_node, to_node=vertical_node))

    def divide_tasks(self, user_prompt: str) -> Tuple[List[str], List[str]]:
        return self.task_divider(user_prompt)

    def orchestrate(self):
        # Solve horizontal tasks sequentially
        horizontal_results = self.horizontal_solver.solve()
        
        # Solve vertical tasks in parallel
        vertical_results = ray.get(self.vertical_solver.solve.remote())
        
        return {"horizontal_results": horizontal_results, "vertical_results": vertical_results}

# 4. Example Usage
if __name__ == "__main__":
    user_prompt = "Example user prompt to divide into tasks"
    
    orchestrator = TaskOrchestrator()
    results = orchestrator.divide_and_orchestrate(user_prompt)

    print("Horizontal Task Results:", results["horizontal_results"])
    print("Vertical Task Results:", results["vertical_results"])


In [None]:
# Import necessary libraries
from langgraph import LangGraph, Node, Edge  # Importing LangGraph library
from dspy.predict.chain_of_thought import ChainOfThought
from react.agent import ReasoningAgent
import ray  # For parallelization
import torch  # PyTorch for model handling

# Define the prompts for each node to guide the model
PROMPTS = {
    "problem_verification": "Verify the problem based on the user-prompt, validate clarity, and redefine if needed.",
    "problem_decomposition": "Decompose the problem into sequential and parallel tasks. Define the sequence and dependencies.",
    "solve_sequential": "Solve each sequential subtask in the order given, considering dependencies.",
    "combine_evidence": "Combine the solutions from each subtask to build a cohesive argument or evidence set.",
    "provide_answer": "Based on the combined evidence, generate a final answer for the user."
}

# Node 1: Problem Verification
class ProblemVerificationNode:
    def __init__(self, prompt=PROMPTS["problem_verification"]):
        self.prompt = prompt

    def verify_problem(self, user_prompt):
        # Use ChainOfThought for validation
        cot = ChainOfThought(self.prompt)
        validation_result = cot.predict(user_prompt)
        return validation_result

# Node 2: Problem Decomposition
class ProblemDecompositionNode:
    def __init__(self, prompt=PROMPTS["problem_decomposition"]):
        self.prompt = prompt

    def decompose_problem(self, user_prompt):
        cot = ChainOfThought(self.prompt)
        decomposition_result = cot.predict(user_prompt)
        # Expected output is in format (horizontal_tasks, vertical_subtasks)
        return decomposition_result.horizontal_tasks, decomposition_result.vertical_subtasks

# Node 3: Sequential Subtask Solver
class SequentialSubtaskSolverNode:
    def __init__(self, prompt=PROMPTS["solve_sequential"]):
        self.prompt = prompt

    def solve_sequentially(self, horizontal_tasks):
        solutions = []
        for task in horizontal_tasks:
            cot = ChainOfThought(self.prompt)
            solution = cot.predict(task)
            solutions.append(solution)
        return solutions

# Node 4: Evidence Combination
class EvidenceCombinationNode:
    def __init__(self, prompt=PROMPTS["combine_evidence"]):
        self.prompt = prompt

    def combine_evidence(self, solutions):
        cot = ChainOfThought(self.prompt)
        combined_evidence = cot.predict(solutions)
        return combined_evidence

# Node 5: Answer Generation
class AnswerGenerationNode:
    def __init__(self, prompt=PROMPTS["provide_answer"]):
        self.prompt = prompt

    def generate_answer(self, combined_evidence):
        cot = ChainOfThought(self.prompt)
        final_answer = cot.predict(combined_evidence)
        return final_answer

# Define the LangGraph and connect the nodes
class ProblemSolvingGraph:
    def __init__(self):
        self.graph = LangGraph()

        # Initialize nodes
        self.problem_verification_node = Node(name="Problem Verification", task=self.problem_verification_task)
        self.problem_decomposition_node = Node(name="Problem Decomposition", task=self.problem_decomposition_task)
        self.sequential_solver_node = Node(name="Sequential Subtask Solver", task=self.sequential_solver_task)
        self.evidence_combination_node = Node(name="Evidence Combination", task=self.evidence_combination_task)
        self.answer_generation_node = Node(name="Answer Generation", task=self.answer_generation_task)

        # Add nodes to graph
        self.graph.add_node(self.problem_verification_node)
        self.graph.add_node(self.problem_decomposition_node)
        self.graph.add_node(self.sequential_solver_node)
        self.graph.add_node(self.evidence_combination_node)
        self.graph.add_node(self.answer_generation_node)

        # Define edges between nodes to enforce order
        self.graph.add_edge(Edge(from_node=self.problem_verification_node, to_node=self.problem_decomposition_node))
        self.graph.add_edge(Edge(from_node=self.problem_decomposition_node, to_node=self.sequential_solver_node))
        self.graph.add_edge(Edge(from_node=self.sequential_solver_node, to_node=self.evidence_combination_node))
        self.graph.add_edge(Edge(from_node=self.evidence_combination_node, to_node=self.answer_generation_node))

    # Define each node task
    def problem_verification_task(self, user_prompt):
        verifier = ProblemVerificationNode()
        return verifier.verify_problem(user_prompt)

    def problem_decomposition_task(self, user_prompt):
        decomposer = ProblemDecompositionNode()
        return decomposer.decompose_problem(user_prompt)

    def sequential_solver_task(self, horizontal_tasks):
        solver = SequentialSubtaskSolverNode()
        return solver.solve_sequentially(horizontal_tasks)

    def evidence_combination_task(self, solutions):
        combiner = EvidenceCombinationNode()
        return combiner.combine_evidence(solutions)

    def answer_generation_task(self, combined_evidence):
        answerer = AnswerGenerationNode()
        return answerer.generate_answer(combined_evidence)

    # Execute the graph
    def solve_problem(self, user_prompt):
        # Execute the graph from start to end
        self.graph.execute({"user_prompt": user_prompt})

# Example usage
if __name__ == "__main__":
    user_prompt = "Provide a solution for a multi-step problem."

    problem_solver_graph = ProblemSolvingGraph()
    results = problem_solver_graph.solve_problem(user_prompt)

    print("Final Answer:", results)
