In [1]:
#計算できる条件を満たしている配列がどれくらいあるか確かめるコード
import pandas as pd
import ast
import os

def in_csv(file_path):
    # 許容されるアミノ酸のリスト
    kind = ['G', 'A', 'V', 'L', 'I', 'C', 'M', 'S', 'T', 'D', 'E', 'N', 'Q', 'R',
            'K', 'H', 'F', 'Y', 'W', 'P', 'dA', 'dV', 'dL', 'dI', 'dC', 'dM',
            'dS', 'dT', 'dD', 'dE', 'dN', 'dQ', 'dR', 'dK', 'dH', 'dF', 'dY', 'dW', 'dP',
            'Abu', 'Sar', 'Me_Bmt(E)', 'meL', 'meV', 'meA', 'meI', 'meM', 'meS', 'meT',
            'meD', 'meN', 'meQ', 'meK', 'meH', 'meF', 'meY', 'meW']

    # 入力データを読み込む
    try:
        data = pd.read_csv(file_path)
        csv_list = data[['ID', 'Sequence', 'Sequence_TPSA', 'TPSA', 'Permeability']]
    except Exception as e:
        print(f"Error reading input CSV file: {e}")
        return

    # 結果を格納するリスト
    result_data = []
    for row in csv_list.itertuples(index=False):
        try:
            # Sequence をリスト形式に変換
            seq_list = ast.literal_eval(row.Sequence)
            seq_id = row.ID
            seq_tpsa = row.Sequence_TPSA
            tpsa = row.TPSA
            permeability = row.Permeability

            if all(acid in kind for acid in seq_list):
                # 条件を突破したデータをリストに格納
                result_data.append({
                    'ID': seq_id,
                    'Sequence': seq_list,
                    'Sequence_TPSA': seq_tpsa,
                    'TPSA': tpsa,
                    'Permeability': permeability
                })

        except Exception as e:
            print(f"Error processing row {row}: {e}")

    # DataFrame に変換
    result_df = pd.DataFrame(result_data)

    # 各位置ごとに CSV ファイルを保存
    peptide_positions = ["in11", "sur11", "out11"]
    for pos in peptide_positions:
        output_file = f"/Users/teraimao/experiment/data/csv_data11/experiment_data_{pos}.csv"
        try:
            # 既存データを読み込む
            if os.path.exists(output_file):
                existing_df = pd.read_csv(output_file)
                # ID に基づいて重複を排除
                combined_df = pd.concat([existing_df, result_df]).drop_duplicates(subset='ID', keep='last')
            else:
                combined_df = result_df

            # CSV ファイルに保存
            combined_df.to_csv(output_file, index=False)
            print(f"Data successfully written to {output_file}")

        except Exception as e:
            print(f"Error writing to {output_file}: {e}")


In [16]:
file_path ="/Users/teraimao/experiment/data/asid_csv_data/CycPeptMPDB_Peptide_Length_11.csv"

In [17]:
in_csv(file_path)

Data successfully written to /Users/teraimao/experiment/data/csv_data11/experiment_data_in11.csv
Data successfully written to /Users/teraimao/experiment/data/csv_data11/experiment_data_sur11.csv
Data successfully written to /Users/teraimao/experiment/data/csv_data11/experiment_data_out11.csv
