In [5]:
import pandas as pd
with open('./data/machine_data.txt') as f:
    lines = f.readlines()
data = [line.strip().split('\t') for line in lines]
df = pd.DataFrame(data[1:], columns=data[0])
print(df.columns)
df


Index(['Year', 'Month', 'Day', 'Hour', 'Minute', 'operation'], dtype='object')


Unnamed: 0,Year,Month,Day,Hour,Minute,operation
0,2021,1,29,1,14,error
1,2021,1,29,1,14,stopped
2,2021,1,29,1,15,operating
3,2021,1,29,1,15,error
4,2021,1,29,1,15,stopped
...,...,...,...,...,...,...
197,2021,1,30,15,49,stopped
198,2021,1,30,15,50,operating
199,2021,1,30,15,50,stopped
200,2021,1,30,15,51,error


In [8]:
def convert_to_txt_all_chunks(df):
    # 30분 단위 그룹핑
    df['MinuteGroup'] = (df['Minute'] // 30) * 30
    df['datetime_group'] = pd.to_datetime(df[['Year', 'Month', 'Day', 'Hour']]) + pd.to_timedelta(df['MinuteGroup'], unit='m')
    grouped = df.groupby('datetime_group')

    summaries = []
    for dt, group in grouped:
        operations = group['operation'].tolist()
        state_flow = " → ".join(operations)
        counter = Counter(operations)

        # 반복 상태 감지
        from itertools import groupby
        repeats = [f"{op} 연속 {sum(1 for _ in g)-1}회"
                   for op, g in groupby(operations) if len(list(g)) > 1]

        # 상태 전이 감지
        transitions = list(zip(operations[:-1], operations[1:]))
        error_to_stopped = transitions.count(('error', 'stopped'))
        stopped_to_operating = transitions.count(('stopped', 'operating'))

        # 요약 텍스트
        dt_str = f"{dt.year}년 {dt.month}월 {dt.day}일 {dt.hour}시 {dt.minute:02d}분"
        total_events = len(operations)

        summary = [
            f"[{dt_str}] 상태 요약",
            f"- 상태 흐름: {state_flow}",
            f"- 총 이벤트 수: {total_events}회",
            f"- 오류: {counter['error']}회, 정지: {counter['stopped']}회, 작동: {counter['operating']}회",
            f"- 전이 패턴: 오류→정지 {error_to_stopped}회, 정지→작동 {stopped_to_operating}회",
        ]

        if repeats:
            summary.append(f"- 반복 상태: {', '.join(repeats)}")

        summaries.append("\n".join(summary) + "\n")

    return "\n".join(summaries)


In [11]:
import pandas as pd
from collections import Counter
from pathlib import Path
import os
from itertools import groupby

valid_columns = {"Year", "Month", "Day", "Hour", "Minute", "operation"}
output_paths = []

def convert_to_txt_all_chunks(df):
    # 30분 단위로 그룹핑
    df['MinuteGroup'] = (df['Minute'] // 30) * 30
    df['datetime_group'] = pd.to_datetime(df[['Year', 'Month', 'Day', 'Hour']]) + pd.to_timedelta(df['MinuteGroup'], unit='m')
    grouped = df.groupby('datetime_group')

    summaries = []
    for dt, group in grouped:
        operations = group['operation'].tolist()
        state_flow = " → ".join(operations)
        counter = Counter(operations)

        # 반복 상태
        repeats = [f"{op} 연속 {sum(1 for _ in g)-1}회"
                   for op, g in groupby(operations) if len(list(g)) > 1]

        # 상태 전이
        transitions = list(zip(operations[:-1], operations[1:]))
        error_to_stopped = transitions.count(('error', 'stopped'))
        stopped_to_operating = transitions.count(('stopped', 'operating'))

        # 시간 문자열
        dt_str = f"{dt.year}년 {dt.month}월 {dt.day}일 {dt.hour}시 {dt.minute:02d}분"
        total_events = len(operations)

        summary = [
            f"[{dt_str}] 상태 요약",
            f"- 상태 흐름: {state_flow}",
            f"- 총 이벤트 수: {total_events}회",
            f"- 오류: {counter['error']}회, 정지: {counter['stopped']}회, 작동: {counter['operating']}회",
            f"- 전이 패턴: 오류→정지 {error_to_stopped}회, 정지→작동 {stopped_to_operating}회",
        ]
        if repeats:
            summary.append(f"- 반복 상태: {', '.join(repeats)}")

        summaries.append("\n".join(summary) + "\n\n")

    return "\n".join(summaries)

# 디렉토리 설정
input_folder = Path('./data')
output_folder = Path('./kor_logs')
output_folder.mkdir(exist_ok=True)

# 파일별 처리
for file in os.listdir(input_folder):
    if file.endswith(".txt"):
        input_path = input_folder / file
        try:
            df = pd.read_csv(input_path, sep="\t")
            if not valid_columns.issubset(set(df.columns)):
                continue
        except Exception:
            continue

        summary_text = convert_to_txt_all_chunks(df)
        output_filename = f"{Path(file).stem}_summary.txt"
        output_path = output_folder / output_filename
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(summary_text)
        output_paths.append(output_path)
