In [5]:
import pandas as pd
with open('./data/machine_data.txt') as f:
    lines = f.readlines()
data = [line.strip().split('\t') for line in lines]
df = pd.DataFrame(data[1:], columns=data[0])
print(df.columns)
df


Index(['Year', 'Month', 'Day', 'Hour', 'Minute', 'operation'], dtype='object')


Unnamed: 0,Year,Month,Day,Hour,Minute,operation
0,2021,1,29,1,14,error
1,2021,1,29,1,14,stopped
2,2021,1,29,1,15,operating
3,2021,1,29,1,15,error
4,2021,1,29,1,15,stopped
...,...,...,...,...,...,...
197,2021,1,30,15,49,stopped
198,2021,1,30,15,50,operating
199,2021,1,30,15,50,stopped
200,2021,1,30,15,51,error


In [6]:
import os
from pathlib import Path

# 데이터 파일들이 있는 폴더 경로
input_folder = Path('./data')
output_folder = Path("./kor_logs")
output_folder.mkdir(exist_ok=True)

# 전처리 함수
def create_sentence(row):
    date = row['Day']
    month = row['Month']
    year = row['Year']
    hour = row['Hour']
    minute = row['Minute']
    operation = row['operation']
    time = f"{year}년 {month}월 {date}일 {hour}시 {minute}분"
    if operation == "error":
        return f"{time}에 기계에서 오류가 발생했습니다."
    elif operation == "stopped":
        return f"{time}에 기계가 정지되었습니다."
    elif operation == "operating":
        return f"{time}에 기계가 정상적으로 작동 중입니다."
    else:
        return f"{time}에 기계 상태가 '{operation}'로 기록되었습니다."

# 처리할 파일 리스트 (simulated log들만 대상으로)
files_to_process = sorted(input_folder.glob("*.txt"))
output_files = []

# 각 파일에 대해 처리
for file_path in files_to_process:
    df = pd.read_csv(file_path, sep="\t")
    df['Sentence'] = df.apply(create_sentence, axis=1)

    out_name = file_path.stem + "_kor.txt"
    out_path = output_folder / out_name
    with open(out_path, "w", encoding="utf-8") as f:
        for line in df['Sentence']:
            f.write(line + "\n")
    output_files.append(str(out_path))

output_files


['kor_logs\\machine_data_kor.txt',
 'kor_logs\\machine_data_simulated_1_kor.txt',
 'kor_logs\\machine_data_simulated_2_kor.txt',
 'kor_logs\\machine_data_simulated_3_kor.txt']

In [6]:
import pandas as pd
from collections import Counter
from pathlib import Path
import os

valid_columns = {"Year", "Month", "Day", "Hour", "Minute", "operation"}
output_paths = []

# helper 함수
def convert_to_txt_all_chunks(df):
    df['datetime_group'] = pd.to_datetime(df[['Year', 'Month', 'Day', 'Hour']])
    grouped = df.groupby('datetime_group')

    summaries = []
    for dt, group in grouped:
        operations = group['operation'].tolist()
        state_flow = " → ".join(operations)
        counter = Counter(operations)
        dt_str = f"{int(dt.year)}년 {int(dt.month)}월 {int(dt.day)}일 {int(dt.hour)}시"
        summary = (
            f"{dt_str} 상태 요약:\n"
            f"흐름: {state_flow}\n"
            f"오류: {counter.get('error', 0)}회, 정지: {counter.get('stopped', 0)}회, 작동: {counter.get('operating', 0)}회\n"
        )
        summaries.append(summary)
    return "\n".join(summaries)

input_folder = Path('./data')
output_folder = Path('./kor_logs')
output_folder.mkdir(exist_ok=True)

for file in os.listdir(input_folder):
    if file.endswith(".txt"):
        input_path = input_folder / file
        try:
            df = pd.read_csv(input_path, sep="\t")
            if not valid_columns.issubset(set(df.columns)):
                continue
        except Exception:
            continue

        summary_text = convert_to_txt_all_chunks(df)
        output_filename = f"{Path(file).stem}_summary.txt"
        output_path = output_folder / output_filename
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(summary_text)
        output_paths.append(output_path)
