## Generate Training File

In [1]:
import json, os, random
RAW_JSON_PATH = os.path.expanduser("~/autodl-tmp/LEVIR-MCI-dataset/LevirCCcaptions-v2.json")

with open(RAW_JSON_PATH, "r") as f: 
    raw_captions = json.load(f)

raw_captions = [row for row in raw_captions if row['filepath'] == "train"]

In [3]:
instructs = []
lenList = []
global_id = 0
for img in raw_captions:
    # 1）简单描述
    for cap in img['sentences']:
        rslt = {
            'id': global_id,
            'image': [f"{img['filepath']}/A/{img['filename']}", f"{img['filepath']}/B/{img['filename']}"],
            'changeflag': img['changeflag'],
            'conversations': []
            }
        rslt['conversations'].extend([
            {
                "from": "human",
                "value": "<image> Please describe the changes of these two images in detail."
            },
            {
                "from": "gpt",
                "value": cap.strip().replace(" .", ".")
            }
        ]
        )
        instructs.append(rslt)
        global_id += 1

    # 2）判断是否有变化
    rslt = {
        'id': global_id,
        'image': [f"{img['filepath']}/A/{img['filename']}", f"{img['filepath']}/B/{img['filename']}"],
        'conversations': []
        }
    rslt['conversations'].extend([
        {
            "from": "human",
            "value": "<image> <image> Please judge whether these two images have changed. Please answer yes or no."
        },
        {
            "from": "gpt",
            "value": "Yes" if img['changeflag'] == 1 else "No"
        }
    ]
    )

    instructs.append(rslt)
    global_id += 1

    # 3）判断变化的类别和数量
    rslt = {
        'id': global_id,
        'image': [f"{img['filepath']}/A/{img['filename']}", f"{img['filepath']}/B/{img['filename']}"],
        'conversations': []
        }
    
    def getNumAnswer(row):
        ans = []
        road_count = row['change_counts']['road']
        building_count = row['change_counts']['building']
        if road_count == 0 or row['changeflag'] == 0 :
            ans = "No roads has changed, "
        elif road_count == 1:
            ans = "1 road has changed, "
        else:
            ans = f"{road_count} roads has changed, "

        if building_count == 0 or row['changeflag'] == 0:
            ans += "and no buildings has changed."
        elif building_count == 1:
            ans += "and 1 building has changed."
        else:
            ans += f"and {building_count} buildings has changed."
        return ans

    rslt['conversations'].extend([
        {
            "from": "human",
            "value": "<image> <image> Please determine whether roads and buildings have changed, and count the number of objects in each category that have changed."
        },
        {
            "from": "gpt",
            "value": getNumAnswer(img)
        }
    ]
    )
    instructs.append(rslt)
    global_id += 1

    # # 4）判断变化目标的轮廓
    # rslt = {
    #     'id': global_id,
    #     'image': [f"{img['filepath']}/A/{img['filename']}", f"{img['filepath']}/B/{img['filename']}"],
    #     'conversations': []
    #     }
    
    # def getCounterAnswer(row):
    #     ansList = []
    #     for cat, value in row['contours'].items():
    #         ansList.append(f"The contours of the changed {cat}s are:{value}")
    #     ans = ", ".join(ansList) + "."
    #     lenList.append(len(ans))
    #     return ans

    # rslt['conversations'].extend([
    #     {
    #         "from": "human",
    #         "value": "<image> <image> Please draw the outlines of the changed buildings and roads, approximating them with polygons."
    #     },
    #     {
    #         "from": "gpt",
    #         "value": getCounterAnswer(img)
    #     }
    # ]
    # )
    # instructs.append(rslt)
    # global_id += 1

    # 5）多轮对话
    rslt = {
        'id': global_id,
        'image': [f"{img['filepath']}/A/{img['filename']}", f"{img['filepath']}/B/{img['filename']}"],
        'conversations': []
        }
    
    rslt['conversations'].extend([
        {
            "from": "human",
            "value": "<image> <image> Please judge whether these two images have changed. Please answer yes or no."
        },
        {
            "from": "gpt",
            "value": "Yes" if img['changeflag'] == 1 else "No"
        },
        {
            "from": "human",
            "value": "If changes have occurred, count the number of road and building changes separately."
        },
        {
            "from": "gpt",
            "value": getNumAnswer(img)
        },
        {
            "from": "human",
            "value": "Based on the above analysis, please describe the changes of these two images in detail."
        },
        {
            "from": "gpt",
            "value": random.choice(img['sentences']).strip().replace(" .", ".")
        }
    ]
    )
    instructs.append(rslt)
    global_id += 1

In [4]:
with open("/root/autodl-tmp/LEVIR-MCI-dataset/ChangeChat_instruct_60k.json", "w") as f:
    json.dump(instructs, f)

In [3]:
instructs = []
global_id = 0
for img in raw_captions:
    # 2）判断是否有变化
    rslt = {
        'id': global_id,
        'image': [f"{img['filepath']}/A/{img['filename']}", f"{img['filepath']}/B/{img['filename']}"],
        'changeflag': img['changeflag']
        }
    instructs.append(rslt)
    global_id += 1

In [6]:
with open("/root/autodl-tmp/LEVIR-MCI-dataset/ChangeChat_classify.json", "w") as f:
    json.dump(instructs, f)

## Generate Benchmark File

In [1]:
import json, os
RAW_JSON_PATH = os.path.expanduser("~/autodl-tmp/LEVIR-MCI-dataset/LevirCCcaptions-v2.json")
with open(RAW_JSON_PATH, "r") as f: 
    raw_captions = json.load(f)
raw_captions = [row for row in raw_captions if row['filepath'] == "test"]

In [3]:
import jsonlines
instructs = []
global_id = 100001
for img in raw_captions:
    rslt = {
        'question_id': global_id,
        'image': [f"{img['filepath']}/A/{img['filename']}", f"{img['filepath']}/B/{img['filename']}"],
        'changeflag': img['changeflag']
        }
    instructs.append(rslt)
    global_id += 1
with open("/root/autodl-tmp/LEVIR-MCI-dataset/Test_CC_gt.json", "w") as wfd:
    json.dump(instructs, wfd)

In [2]:
import jsonlines
instructs = []
global_id = 100001
for img in raw_captions:
    for cap in img['sentences']:
        rslt = {
            'question_id': global_id,
            'image': [f"{img['filepath']}/A/{img['filename']}", f"{img['filepath']}/B/{img['filename']}"],
            'text': "<image> <image> Please Describe what changes occurred between the two remote sensing images?",
            'changeflag': img['changeflag'],
            'category': 'CC'
            }
        instructs.append(rslt)
        global_id += 1
        break

with jsonlines.open("/root/autodl-tmp/LEVIR-MCI-dataset/Test_CC_v3.jsonl", "w") as wfd:
    for data in instructs:
        wfd.write(data)

In [4]:
import jsonlines
instructs = []
global_id = 100001
for img in raw_captions:
    for cap in img['sentences']:
        rslt = {
            'question_id': global_id,
            'image': [f"{img['filepath']}/A/{img['filename']}", f"{img['filepath']}/B/{img['filename']}"],
            'text': "<image> <image>\n Have the objects in these two remote sensing images changed? Please answer yes or no.",
            'category': 'CC'
            }
        instructs.append(rslt)
        global_id += 1
        break

with jsonlines.open("/root/autodl-tmp/LEVIR-MCI-dataset/Test_CC_v3_conv_r1.jsonl", "w") as wfd:
    for data in instructs:
        wfd.write(data)