ログからデータを抽出しファイルに格納

In [1]:
def process_log_file(file_path):
    events = []
    current_event = []

    with open(file_path, 'r') as file:
        for line in file:
            if line.strip():  # 行が空でない場合
                current_event.append(line)
            else:
                if current_event:  # 現在のセクションが空でない場合
                    events.append(''.join(current_event))  # 行を結合してセクションを追加
                    current_event = []

        # 最後のセクションを追加（空行で区切られない場合も考慮）
        if current_event:
            events.append(''.join(current_event))

    return events

def filter_events_by_search_str(strings, search_strings):
    """
    指定された検索文字列のいずれかを含む文字列だけを抽出する関数。

    :param strings: 検索対象の文字列のリスト
    :param search_strings: 検索する文字列のリスト
    :return: 検索文字列のいずれかを含む文字列のリスト
    """
    filtered = []
    for s in strings:
        if any(search in s for search in search_strings):
            filtered.append(s)
    return filtered

def write_events_to_file(sections, output_path):
    with open(output_path, 'w') as file:
        for section in sections:
            file.write(section + '\n')  # セクションの後に空白行を追加


events = process_log_file("./results/General-#0.elog")
search_strings = [
    "DEBUG:Sending (inet::physicallayer::RadioFrame)GeoNet packet from (artery::VanetRadio)radio",
    "DEBUG:Computing whether reception is possible"
]
filtered_events = filter_events_by_search_str(events, search_strings)
write_events_to_file(filtered_events, "./results/filtered-General-#0.elog")






filtered-General-#0.elogからextracted_events.jsonを作る

In [1]:
import json
import os
from collections import defaultdict

def process_log_file(file_path):
    events = []
    current_event = []

    with open(file_path, 'r') as file:
        for line in file:
            if line.strip():  # Check if the line is not empty
                current_event.append(line.strip())
            else:
                if current_event:
                    events.append(current_event)
                    current_event = []

        # Append the last event if it doesn't end with an empty line
        if current_event:
            events.append(current_event)

    return events


def extract_event_id(event):
    first_item = event[0]  # Get the first item from the list
    if "E #" in first_item:
        parts = first_item.split()
        for i, part in enumerate(parts):
            if part == "E":  # Check for the "E" token
                if i + 2 < len(parts):  # Ensure there's a number after "E #"
                    return parts[i + 2]  # Return the number following "E #"
    return None  # Return None if "E #" pattern is not found


def filter_events_by_strings(events):

    search_strings = ["KF"]

    filtered_events = [event for event in events if any(any(substring in line for substring in search_strings) for line in event)]
    return filtered_events

def extract_following_events_based_on_occurrences(events, search_string):
    results = {}
    for i, event in enumerate(events[:]):
        # Count occurrences of the search string in the current event
        count = sum(search_string in line for line in event)
        
        if count > 0:
            # Extract (count - 1) subsequent events
            count -= 1  # Adjust to extract one fewer than the count of occurrences
            buffer_log_num = 1 # tranmitterのログからすぐにreceiverのログに行かない可能性があるためバッファを設ける
            subsequent_events = events[i+1:i+1+count+buffer_log_num] if i+1+count <= len(events) else events[i+1:]

            results[extract_event_id(event)] = {}
            results[extract_event_id(event)]["transmitter_event"] = event
            results[extract_event_id(event)]["receiver_events"] = subsequent_events
    
    return results

def extract_transmitter_id_and_timestamp(log_entries):
    transmitter_id = None
    startTime = None
    endTime = None
    transmitter_position = None
    for entry in log_entries:
        if "transmitterId =" in entry:
            parts = entry.split(',')
            # Extract transmitterId and startTime
            for part in parts:
                if "transmitterId" in part:
                    transmitter_id = part.split('=')[-1].strip()
                if "startTime" in part:
                    startTime = part.split('=')[-1].strip()
                    startTime = float(startTime)
                if "endTime" in part:
                    endTime = part.split('=')[-1].strip()
                    endTime = float(endTime)
                
                
                # Attempt to extract position if available
                if "startPosition =" in entry:
                    # Correctly capture the full coordinates in the tuple format
                    transmitter_position = entry.split("startPosition =")[1].split('),')[0].strip() + ')'
    return transmitter_id, startTime, endTime, transmitter_position


# from multiple receiver logs, extract receiver_id and reception possibility
def extract_receiver_id_and_reception_possibility(log_entries_2list):
    results = {}
    receivable_id_list = []

    for log_entries in log_entries_2list:
        receiver_id = None
        position = None
        reception_status = "possible"
        startTime = None
        endTime = None

        for entry in log_entries:


            if "receiverId =" in entry:
                # Extract the receiver ID from the entry
                receiver_id = entry.split('receiverId =')[-1].split(',')[0].strip()
            # Attempt to extract position if available
            if "startPosition =" in entry:
                # Correctly capture the full coordinates in the tuple format
                position = entry.split("startPosition =")[1].split('),')[0].strip() + ')'
            # Update the reception status based on current entry details
            if "reception is impossible" in entry:
                reception_status = "impossible"


            # if "startTime" in entry:
            #     startTime = entry.split('=')[-1].strip()
            # if "endTime" in entry:
            #     endTime = entry.split('=')[-1].strip()

        if receiver_id is not None:
            results[receiver_id] = {
                "position": position,
                "reception_status": reception_status
            }

            if reception_status == "possible":
                receivable_id_list.append(int(receiver_id))
            
    # order by receiver_id
    results = dict(sorted(results.items(), key=lambda x: x[0]))
    receivable_id_list = sorted(set(receivable_id_list))
        
    return results, receivable_id_list


def extract_paramater_values(events_dict):
    for key, value in events_dict.items():
        transmitter_id, transmitter_startTime, transmitter_endTime,transmitter_position = extract_transmitter_id_and_timestamp(value["transmitter_event"])
        receive_results, receivable_id_list = extract_receiver_id_and_reception_possibility(value["receiver_events"])
        events_dict[key]["transmitter_id"] = transmitter_id
        events_dict[key]["startTime"] = transmitter_startTime
        events_dict[key]["endTime"] = transmitter_endTime
        events_dict[key]["transmitter_position"] = transmitter_position
        events_dict[key]["receiver_results"] = receive_results
        events_dict[key]["receivable_id_list"] = receivable_id_list
        events_dict[key]["receivable_id_count"] = len(receivable_id_list)

        # Remove the transmitter_event and receiver_events keys
        events_dict[key].pop("transmitter_event")
        events_dict[key].pop("receiver_events")
    
    return events_dict

def ensure_directory_exists(file_path):
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)

def split_json_by_transmitter_id(data):
    # transmitter_idでデータを分割するための辞書
    split_data = defaultdict(dict)

    # 各エントリをtransmitter_idごとに分割
    for key, entry in data.items():
        transmitter_id = entry.get('transmitter_id')
        if transmitter_id is not None:
            if transmitter_id not in split_data:
                split_data[transmitter_id] = {}
            split_data[transmitter_id][key] = entry

    # transmitter_idごとのデータを辞書に格納
    result = {id: entries for id, entries in split_data.items()}

    return result


        

def main():

    speed = 80 # km/hour
    events = process_log_file("./results/filtered-General-#0.elog")

    # # write event.json file
    # with open("./results/events.json", 'w') as f:
    #     json.dump(events[:1000], f, indent=4)

    log_folder_path = "./results/speed" + str(speed) + "/250vehicle/"
    ensure_directory_exists(log_folder_path)
    filtered_events_file_path = log_folder_path + "filtered_events.json"
    # extracted_events_file_path = log_folder_path + "/extracted_events.json"
    # print(events)

    filtered_string = "DEBUG:Sending (inet::physicallayer::RadioFrame)GeoNet packet"
    filtered_events = extract_following_events_based_on_occurrences(events, filtered_string)
  
    # # write to the json file
    # with open(filtered_events_file_path, 'w') as f:
    #     json.dump(filtered_events, f, indent=4)

    # Extract transmitterId, startTime, receiverId, and reception possibility
    extracted_events = extract_paramater_values(filtered_events)

    # if transmitter_id is not 0 , then the event is deleted form the dictionary
    split_events = split_json_by_transmitter_id(extracted_events)

    for transmitter_id, split_events_item in split_events.items():        
        # write to the json file even if the file is empty
        ensure_directory_exists(log_folder_path + str(transmitter_id) + "/")
        with open(log_folder_path + str(transmitter_id) + "/extracted_events.json", 'w') as f:
            json.dump(split_events_item, f, indent=4)



if __name__ == "__main__":
    main()



extracted_eventsデータから時刻と通信可能ノードのリスト(receivable_time_id.json)を出す

In [2]:
import json

def make_receivable_time_id_file(log_folder_path):
	extracted_events_file_path = log_folder_path + "extracted_events.json"

	with open(extracted_events_file_path, 'r') as f:
		extracted_data = json.load(f)

	# 必要な情報を抽出して辞書にまとめる
	receivable_time_id_data = {}
	for key, value in extracted_data.items():
		receivable_time_id_data [value["startTime"]] = value["receivable_id_list"]
		
	# write to the json file
	with open( log_folder_path + 'receivable_time_id.json', 'w') as f:
		json.dump(receivable_time_id_data, f, indent=4)


# read the json file
speed = 80 # km/hour
booth_change = True
has_multi_proposer = True
proposer_list = [0,1,2]
validator_num = 250
validator_list = [i for i in range(validator_num  + len(proposer_list)) if i not in proposer_list]

if booth_change:
	if has_multi_proposer:
		for proposer_id in proposer_list:
			log_folder_path = "./results/speed" + str(speed) + "/250vehicle/" + str(proposer_id) + "/"
			make_receivable_time_id_file(log_folder_path)
	else:
		log_folder_path = "./results/speed" + str(speed) + "/250vehicle/"
		make_receivable_time_id_file(log_folder_path)
else:
	log_folder_path = "./results/speed" + str(speed) + "/"
	make_receivable_time_id_file(log_folder_path)



receivable_time_idデータから時刻の合間を補完したextended_time_id.jsonを作成

In [3]:
import json

def make_extended_time_id_file(log_folder_path):

	receivable_time_id_data_path = log_folder_path + 'receivable_time_id.json'
	with open(receivable_time_id_data_path, 'r') as f:
		receivable_time_id_data  = json.load(f)


	round_time_id_data = {}
	for key, value in receivable_time_id_data.items():
		round_time_id_data[str(f"{round(float(key), 2):.2f}")] = value


	# order the keys
	round_time_id_data = dict(sorted(round_time_id_data.items(), key=lambda x: float(x[0])))


	extended_time_id_data = {}
	# 元の辞書のキーと値をループして処理
	for key, value in round_time_id_data.items():
		# 新しいキーを追加
		extended_time_id_data[key] = value

		# 小数部を1ずつ増やしながら、指定された範囲のキーが欠けている場合に追加する
		for decimal in range(1,10):
			new_key = f"{round(float(key) + decimal * 0.01, 2):.2f}"
			# 小数点第三位で四捨五入
			if new_key not in round_time_id_data:  # 新しいキーが元の辞書に存在しない場合
				extended_time_id_data[new_key] = value  # 新しいキーを追加し、空のリストを値として設定
			else:
				break

	# write to the json file
	with open( log_folder_path + 'extended_time_id.json', 'w') as f:
		json.dump(extended_time_id_data, f, indent=4)


# read the json file
speed = 80 # km/hour
booth_change = True
has_multi_proposer = True
proposer_list = [0,1,2]
validator_num = 250
validator_list = [i for i in range(validator_num  + len(proposer_list)) if i not in proposer_list]

if booth_change:
	if has_multi_proposer:
		for proposer_id in proposer_list:
			log_folder_path = "./results/speed" + str(speed) + "/250vehicle/" + str(proposer_id) + "/"
			make_extended_time_id_file(log_folder_path)
	else:
		log_folder_path = "./results/speed" + str(speed) + "/250vehicle/"
		make_extended_time_id_file(log_folder_path)
else:
	log_folder_path = "./results/speed" + str(speed) + "/"
	make_extended_time_id_file(log_folder_path)

Vguardで用いるデータに整形(communication_data_for_vguard)

In [4]:
speed = 80 # km/hour
booth_change = True
has_multi_proposer = True
proposer_list = [0,1,2]
validator_num = 250
validator_list = [i for i in range(validator_num  + len(proposer_list)) if i not in proposer_list]
log_folder_path = "./results/speed" + str(speed) + "/250vehicle/"


all_communication_data = {}
for proposer_id in proposer_list:
    # communication-data.jsonを読み込む
    with open(log_folder_path + str(proposer_id) + '/extended_time_id.json', 'r') as file:
        proposer_communication_data = json.load(file)
    all_communication_data[proposer_id] = proposer_communication_data
    with open(log_folder_path + str(proposer_id) + '/communication_node_for_vguard_' + str(proposer_id) + '.json', 'w') as file:
        json.dump(all_communication_data[proposer_id], file, indent=4)


# validator_listに基づいてバリデータの通信可能なproposerリストを作成
validator_output_data = {validator: {} for validator in validator_list}

# まず最初に、最初のプロポーザのキーの集合を基準にする
common_keys = set(all_communication_data[proposer_list[0]].keys())

# proposer_listのすべてのiに対して共通するkeyを取得
for i in range(1, len(proposer_list)):
    # 各プロポーザのキーとの共通集合を更新
    common_keys &= set(all_communication_data[proposer_list[i]].keys())   

# common_keysの中身を数値に変換してソート
common_keys_as_floats = sorted([float(key) for key in common_keys])
common_keys_as_strings = [f"{key:.2f}" for key in common_keys_as_floats]


for key in common_keys_as_strings:
    for validator in validator_list:
        # 各バリデータに対して通信可能なプロポーザーリストを収集
        validator_communication_list = []
        for proposer_id in proposer_list:
            # プロポーザーのリストにバリデータが含まれているかをチェック
            if validator in all_communication_data[proposer_id][key]:
                validator_communication_list.append(proposer_id)
        
        # 通信可能なプロポーザーリストを順番通りに保存
        validator_output_data[validator][key] = validator_communication_list

# communication-data-2.json から communication-data-12.json にデータを保存
for validator_id in validator_list:
    filename = log_folder_path + str(validator_id) + '/communication_node_for_vguard_' + str(validator_id) + '.json'
    with open(filename, 'w') as file:
        json.dump(validator_output_data[validator_id], file, indent=4)
