In [1]:
import json 
import os
import numpy as np
import pandas as pd


In [16]:
prompt = """
## Character
You are a surgical AI visual assistant, and you are seeing an endoscopic {image_video} from a {surgery_type} {image_video}. 
## Tool description
The tool you have is a "{tool_name}" tool. {tool_description}
## Tool return value
After you call the function, you will have an answer like this

    ```json
	{tool_return_value}
    ```
## Task
Based on the information, your task is to generate one question and answer by calling a function of the external expert model and answer the question using the information given back from the function in the following example's format, no other content is needed. 

### Few Examples
    Example 1: {example_1}
   	Example 2: {example_2}
    Example 3: {example_3}
## Constraints
    - You can ask questions with diversity.
    - The only information you have is the tool return value, please generate the question only based on this value.
    -Remember, all the questions can be clearly answered based on the given information. 
    - Please strictly follow the dictionary format in the example.

"""

In [17]:
example_1 = {
    "question": "Can you identify the instruments being used in the surgery and describe their actions?",
    "model_output": "[{'instrument':'rongeur','action':'manipulate','tissue':'packing material'},{'instrument':'suction tube','action':'manipulate','tissue':'packing material'}]",
    "thoughts": "The utilization of triplet detect model could be highly beneficial, the tool helps in understanding the specific roles and actions of these instruments during the procedure.",
    "answer": "Certainly! There are two instruments identified in the surgical procedure:\n1. A 'rongeur' is being used to manipulate the packing material. This instrument is typically used for grasping, pulling, and removing bone and tissue.\n2. A 'suction tube' is also used to manipulate the packing material, likely assisting in keeping the operative field clear by removing excess fluids or small debris.\nBoth instruments play crucial roles in ensuring the surgery is performed efficiently and safely."
}

example_2 = {
    "question": "Can you identify the surgical instruments being used in the video and describe their actions?",
    "model_output": "[{'instrument':'monopolar electrode','action':'manipulate','tissue':'tissue'},{'instrument':'rongeur','action':'manipulate','tissue':'tissue'}]",
    "thoughts": "Leveraging an triplet detect model to recognize the target object could be beneficial. The trplet detect tool states the instrument and its action, which should provide a direct answer to the question regarding the surgical procedure.",
    "answer": "Certainly! In the video, two different surgical instruments are being utilized:\n- A 'monopolar electrode' is being used to manipulate tissue. This tool is typically employed for precise tissue dissection and cauterization, aiding in minimizing bleeding and ensuring clean surgical fields.\n- A 'rongeur' is also being used to manipulate tissue. Rongeurs are strong, plier-like instruments used in neurosurgery for removing bone and tough tissue, often essential in accessing deeper structures during surgeries like pituitary adenoma resection."
}

example_3 = {
    "question": "What surgical instrument is being used to manipulate blood in the endoscopic video of the transsphenoidal pituitary adenoma resection surgery?",
    "model_output": "[{'instrument':'suction tube','action':'manipulate','target':'blood'}]",
    "thoughts": "Harnessing an triplet detect model to detect the existence of the target object would be of great advantage.",
    "answer": "A suction tube is being used to manipulate blood during the transsphenoidal pituitary adenoma resection surgery. The suction tube helps in clearing the surgical field by removing excess blood, thus providing better visibility and precision for the surgical procedure."
}

In [18]:
info_dict = {
    "image_video":"video",
    "surgery_type":"transsphenoidal pituitary adenoma",
    "tool_name":"triplet_detect",
    "tool_description":"The triplet detect tool will return a triplet list containing several dictionaries, each dictionary illustrates a triplet (surgical instrument, verb, target)",
    "tool_return_value":[{"instrument":"suction tube","action":"manipulate","suction":"blood",}],
    "example_1":example_1,
    "example_2":example_2,
    "example_3":example_3,
}

In [19]:
def extract_triplets(data_dict):
    # 初始化结果列表
    triplets = []
    
    # 遍历可能的索引，这里假设最多有4组
    for i in range(1, 5):
        # 构建键名
        tool_key = f'tool_{i}'
        action_key = f'action_{i}'
        tissue_key = f'tissue_{i}'
        
        # 检查这些键是否在字典中且不为NaN
        if tool_key in data_dict and data_dict[tool_key] != 'NaN' and data_dict[tool_key] is not None and not pd.isnull(data_dict[tool_key]):
            tool = data_dict[tool_key]
        else:
            tool = None
        
        if action_key in data_dict and data_dict[action_key] != 'NaN' and data_dict[action_key] is not None and not pd.isnull(data_dict[tool_key]):
            action = data_dict[action_key]
        else:
            action = None
        
        if tissue_key in data_dict and data_dict[tissue_key] != 'NaN' and data_dict[tissue_key] is not None and not pd.isnull(data_dict[tool_key]):
            tissue = data_dict[tissue_key]
        else:
            tissue = None

        # 如果三个都非None，则加入到结果列表中
        if tool and action and tissue:
            triplets.append((tool, action, tissue))

    return triplets

In [9]:
dir_path = "/mnt/xingjian_luo/dataset/surgFC/dataset/triplet/annotation-upload"
file_path = os.listdir(dir_path)
total_list = []
for i in file_path:
    path = os.path.join(dir_path,i)
    annotation = pd.read_csv(path,sep=";")
    for j in range(len(annotation)):
        temp_dict = {}
        record = annotation.iloc[j]
        if record["folder"] is not np.nan:
            video_path = os.path.join(str(record["folder"]),str(record["filename"]))
            temp_dict["video_path"] = video_path
            triplet = extract_triplets(record)
            temp_dict["triplet"] = triplet
            total_list.append(temp_dict)
            



In [13]:
with open("/mnt/xingjian_luo/dataset/surgFC/dataset/triplet/annotation.json","w") as f:
    f.write(json.dumps(total_list,indent=2))

In [14]:
import argparse
import json
import os

import openai
import time

NUM_SECONDS_TO_SLEEP = 0.5
import os
from openai import OpenAI
from tqdm import tqdm

client = OpenAI(api_key="sk-WVJp2orFuuvPTf5P5dD936B1De78421b9eEa2c99D70b8a06", 
                base_url="https://vip.yi-zhan.top/v1")


def get_result(prompt):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a helpful assistant"},
            {"role": "user", "content": prompt},
        ],
        stream=False,
        temperature=0.01
    )
    return response.choices[0].message.content
    

In [None]:
info_dict = {
    "image_video":"video",
    "surgery_type":"transsphenoidal pituitary adenoma",
    "tool_name":"triplet_detect",
    "tool_description":"The triplet detect tool will return a triplet list containing several dictionaries, each dictionary illustrates a triplet (surgical instrument, verb, target)",
    "tool_return_value":[{"instrument":"suction tube","action":"manipulate","suction":"blood",}],
    "example_1":example_1,
    "example_2":example_2,
    "example_3":example_3,
}

In [22]:
selected = total_list[:5000]

In [20]:
answer_list = []
for i in tqdm(selected):
    info_dict["tool_return_value"] = i["triplet"]
    result = get_result(prompt.format(**info_dict))
    temp_dict = {}
    temp_dict["video_path"] = i["video_path"]
    temp_dict["triplet"] = i["triplet"]
    temp_dict["result"] = result
    answer_list.append(temp_dict)
    

```json
{'question': 'What surgical instrument is being used to manipulate tissue in the endoscopic video from the transsphenoidal pituitary adenoma surgery?', 'model_output': "[{'instrument':'suction tube','action':'manipulate','tissue':'tissue'}]", 'thoughts': 'Utilizing the triplet detect model enables clear identification of which instrument is used and how, in the surgery.', 'answer': "A suction tube is being used to manipulate tissue during the transsphenoidal pituitary adenoma surgery. Suction tubes are essential in such procedures to remove fluids and small debris, thus clearing the view and allowing precise manipulation of tissue."}
```


In [23]:
i

{'video_path': '2020-10-17_092038/2020-10-17_092038_VID001_60_90.mp4',
 'triplet': [('suction tube', 'manipulate', 'tissue')]}