In [None]:
import pandas as pd
import os
def process_and_flag_anomalies(folder_path, threshold=0.05):
    submission_data = []

    for file_name in sorted(os.listdir(folder_path)):
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)
            data = pd.read_csv(file_path)
            print(file_name)

            last_sample = data.iloc[-1]
            Q_columns = [col for col in last_sample.index if col.startswith('Q')]
            pressure_columns = [col for col in data.columns if col.startswith('P')]

            if Q_columns:
                last_sample_q = last_sample[Q_columns]
                q1_value = last_sample_q["Q1"]
                other_q_sum = last_sample_q.drop("Q1").sum()

                if q1_value - other_q_sum >= 2000:
                    q_flags = [1] * len(pressure_columns)
                else:
                    q_flags = [0] * len(pressure_columns)
            else:
                q_flags = [0] * len(pressure_columns)

            if len(data) >= 60:
                selected_data = data[pressure_columns]
                recent_indices = [-1, -2, -3]
                specific_past_indices = range(-60, 0)

                decrease_ratios_per_column = {col: [] for col in pressure_columns}

                for recent_idx in recent_indices:
                    if abs(recent_idx) <= len(selected_data):
                        current_values = selected_data.iloc[recent_idx]
                        for past_idx in specific_past_indices:
                            if abs(past_idx) <= len(selected_data):
                                past_values = selected_data.iloc[past_idx]
                                decrease_ratio = (past_values - current_values) / (past_values + 1e-6)
                                for col in pressure_columns:
                                    decrease_ratios_per_column[col].append(decrease_ratio[col])

                p_flags = [
                    1 if any(ratio >= threshold for ratio in decrease_ratios_per_column[col]) else 0
                    for col in pressure_columns
                ]
            else:
                print(f"파일 {file_name}에 데이터가 60개보다 적습니다. P 조건 건너뜁니다.")
                p_flags = [0] * len(pressure_columns)

            q_p_mappings = {
                "Q1": ["P1", "P2", "P3", "P4", "P5", "P6"],
                "Q7": ["P7"],
                "Q8": ["P8"]
            }
            pq_flags = [0] * len(pressure_columns)

            for col in pressure_columns:
                if p_flags[pressure_columns.index(col)] == 1: 
                    related_q_keys = [q_key for q_key, p_list in q_p_mappings.items() if col in p_list]

                    for q_key in related_q_keys:
                        if q_key in Q_columns:
                            past_q_values = data[q_key].iloc[-60:-45]
                            condition_filtered_past = past_q_values[
                                (past_q_values - other_q_sum) < 2000
                            ]
                            if len(condition_filtered_past) > 0:
                                min_past_q = condition_filtered_past.min()
                                current_q = data[q_key].iloc[-1]
                                if current_q > min_past_q:
                                    pq_flags[pressure_columns.index(col)] = 1

            final_flags = [
                1 if (q == 1 and p == 1 and pq == 1) else 0
                for q, p, pq in zip(q_flags, p_flags, pq_flags)
            ]

            file_id = file_name.replace(".csv", "")
            submission_data.append({"ID": file_id, "flag_list": final_flags})

    return pd.DataFrame(submission_data)

def process_and_flag_anomalies_D(folder_path, threshold=0.05):
    submission_data = []

    for file_name in sorted(os.listdir(folder_path)):
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)
            data = pd.read_csv(file_path)
            print(file_name)

            last_sample = data.iloc[-1]
            Q_columns = [col for col in last_sample.index if col.startswith('Q')]
            pressure_columns = [col for col in data.columns if col.startswith('P')]

            if Q_columns:
                last_sample_q = last_sample[Q_columns]
                q1_value = last_sample_q["Q1"]
                other_q_sum = last_sample_q.drop("Q1").sum()

                if q1_value - other_q_sum >= 2000:
                    q_flags = [1] * len(pressure_columns)
                else:
                    q_flags = [0] * len(pressure_columns)
            else:
                q_flags = [0] * len(pressure_columns)

            if len(data) >= 60:
                selected_data = data[pressure_columns]
                recent_indices = [-1, -2, -3]
                specific_past_indices = range(-60, 0)

                decrease_ratios_per_column = {col: [] for col in pressure_columns}

                for recent_idx in recent_indices:
                    if abs(recent_idx) <= len(selected_data):
                        current_values = selected_data.iloc[recent_idx]
                        for past_idx in specific_past_indices:
                            if abs(past_idx) <= len(selected_data):
                                past_values = selected_data.iloc[past_idx]
                                decrease_ratio = (past_values - current_values) / (past_values + 1e-6)
                                for col in pressure_columns:
                                    decrease_ratios_per_column[col].append(decrease_ratio[col])

                p_flags = [
                    1 if any(ratio >= threshold for ratio in decrease_ratios_per_column[col]) else 0
                    for col in pressure_columns
                ]
            else:
                print(f"파일 {file_name}에 데이터가 60개보다 적습니다. P 조건 건너뜁니다.")
                p_flags = [0] * len(pressure_columns)

            q_p_mappings = {
                "Q1": ["P1", "P2", "P3", "P4", "P5", "P6"],
                "Q3": ["P4"],
                "Q4": ["P5"],
                "Q5": ["P6"]
            }
            pq_flags = [0] * len(pressure_columns)

            for col in pressure_columns:
                if p_flags[pressure_columns.index(col)] == 1:  
                    related_q_keys = [q_key for q_key, p_list in q_p_mappings.items() if col in p_list]

                    for q_key in related_q_keys:
                        if q_key in Q_columns:
                            past_q_values = data[q_key].iloc[-60:-45]
                            condition_filtered_past = past_q_values[
                                (past_q_values - other_q_sum) < 2000
                            ]
                            if len(condition_filtered_past) > 0:
                                min_past_q = condition_filtered_past.min()
                                current_q = data[q_key].iloc[-1]
                                if current_q > min_past_q:
                                    pq_flags[pressure_columns.index(col)] = 1

            final_flags = [
                1 if (q == 1 and p == 1 and pq == 1) else 0
                for q, p, pq in zip(q_flags, p_flags, pq_flags)
            ]

            file_id = file_name.replace(".csv", "")
            submission_data.append({"ID": file_id, "flag_list": final_flags})

    return pd.DataFrame(submission_data)

c_folder_path = 'data/test/C'  
d_folder_path = 'data/test/D'  

submission_c = process_and_flag_anomalies(c_folder_path)
submission_d = process_and_flag_anomalies_D(d_folder_path)

submission = pd.concat([submission_c, submission_d]).reset_index(drop=True)
submission["flag_list"] = submission["flag_list"].apply(str)

submission.to_csv("submission.csv", index=False)