In [1]:
import os
from collections import defaultdict

In [2]:
# 设置文件夹路径
question_dir = 'D:\\VQA\\validation\\Questions'
answers_dir = 'D:\\VQA\\validation\\Answers'


In [3]:
def count_questions(directory):
    question_counts = defaultdict(int)
    error_files = []  # 用来收集出现解码错误的文件列表
    
    # 检查目录是否存在
    if not os.path.exists(directory):
        print(f"Directory does not exist: {directory}")
        return question_counts, error_files
    
    # 遍历指定目录下的所有文件
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        # 确保是文件而非目录
        if os.path.isfile(file_path):
            try:
                with open(file_path, 'r', encoding='utf-8') as file:
                    content = file.read().strip()  # 读取整个文件内容作为一个问题
                    question_counts[content] += 1
            except UnicodeDecodeError as e:
                # 添加到错误文件列表
                error_files.append((file_path, str(e)))

    return question_counts, error_files

In [5]:
# 调用函数并打印结果
results, errors = count_questions(question_dir)
if results:
    print("Question Counts:")
    for question, count in sorted(results.items(), key=lambda item: item[1], reverse=True):
        print(f"Question: '{question}' appears {count} times.")
else:
    print("No data found or directory is empty.")

if errors:
    print("\nError Files:")
    for file_path, error in errors:
        print(f"File '{file_path}' could not be read: {error}")
else:
    print("No decoding errors found.")

Question Counts:
Question: 'How many cars are waiting to turn left in the bottom lane, moving from the bottom towards the left?' appears 6 times.
Question: 'How many pedestrians are crossing the zebra crossing?' appears 5 times.
Question: 'Are there many cars driving on the road right now? Not including the cars waiting at the traffic light.' appears 5 times.
Question: 'How many people, including pedestrians, motorcyclists and cyclists  are waiting to cross the crosswalk from bottom left to bottom right' appears 3 times.
Question: 'Did any car's that were waiting for a red light cross the white line?' appears 3 times.
Question: 'In what direction is the car moving on the road? Excluding those that are waiting.' appears 3 times.
Question: 'How many cars are going straight from left to right in the left lane?' appears 3 times.
Question: 'How many cars are waiting to go stright in the left-hand lane, moving form the left to the right?' appears 3 times.
Question: 'Are there many people and

In [28]:
def find_files_by_question(directory, target_question):
    matching_files = []
    
    # 检查目录是否存在
    if not os.path.exists(directory):
        print(f"Directory does not exist: {directory}")
        return matching_files
    
    # 遍历指定目录下的所有文件
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        # 确保是文件而非目录
        if os.path.isfile(file_path):
            try:
                with open(file_path, 'r', encoding='utf-8') as file:
                    content = file.read().strip()  # 读取整个文件内容
                    if content == target_question:  # 比较文件内容与目标问题
                        matching_files.append(filename)
            except UnicodeDecodeError:
                print(f"Could not decode file {file_path} using UTF-8.")

    return matching_files

# 用户输入的问题
target_question = ""

# 调用函数并打印结果
matching_files = find_files_by_question(question_dir, target_question)
if matching_files:
    print("Matching Files:")
    for file in matching_files:
        print(file)
else:
    print("No files match the provided question.")

Matching Files:
148709_sj8fas2e152d20211124air_420_1637216131_1637218737_143_obstacle.txt
148709_sj8fas2e152d20211124air_420_1637216131_1637218737_20_obstacle.txt
148709_sj8fas2e152d20211124air_420_1637216131_1637218737_213_obstacle.txt
148709_sj8fas2e152d20211124air_420_1637216131_1637218737_32_obstacle.txt
148709_sj8fas2e152d20211124air_420_1637216131_1637218737_84_obstacle.txt
148709_sj8fas2e152d20211124air_420_1637216131_1637218737_9_obstacle.txt


In [6]:
def find_and_replace_in_files(directory, target_question, new_content):
    matching_files = []
    modified_files = []

    # 检查目录是否存在
    if not os.path.exists(directory):
        print(f"Directory does not exist: {directory}")
        return matching_files, modified_files

    # 遍历指定目录下的所有文件
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        # 确保是文件而非目录
        if os.path.isfile(file_path):
            try:
                with open(file_path, 'r', encoding='utf-8') as file:
                    content = file.read().strip()  # 读取整个文件内容
                    if content == target_question:  # 比较文件内容与目标问题
                        matching_files.append(filename)

                        # 替换文件内容
                        with open(file_path, 'w', encoding='utf-8') as file_to_write:
                            file_to_write.write(new_content)
                            modified_files.append(filename)
            except UnicodeDecodeError:
                print(f"Could not decode file {file_path} using UTF-8.")
            except IOError as e:
                print(f"Error while writing to file {file_path}: {e}")

    return matching_files, modified_files


# 用户输入的问题和新内容
target_question = ""
new_content = ""

# 调用函数并打印结果
matching_files, modified_files = find_and_replace_in_files(question_dir, target_question, new_content)
if matching_files:
    print("Matching Files:")
    for file in matching_files:
        print(file)
    print("\nModified Files:")
    for file in modified_files:
        print(file)
else:
    print("No files match the provided question.")

No files match the provided question.
