DB から、type が ab で sub_ty or obj_ty が cell のものに関する情報をすべて抽出

In [18]:
import sqlite3

for i in range(1, 5265):
    original_db_path = f"/workspace/ssd4t/yoshikawa/0_DATA/SemMed/01_semmed_db/{i:04}_semmed.db"
    new_db_path = f"/workspace/ssd4t/yoshikawa/241206_cell_NER/02_semmed_ext_cells/semmed_db/{i:04}_semmed.db"

    conn_original = sqlite3.connect(original_db_path)
    conn_new = sqlite3.connect(new_db_path)

    # 新しいDBにテーブルを作成
    with conn_new:
        conn_new.execute("""
        CREATE TABLE sentence (
            sent_id INT PRIMARY KEY,
            type TEXT,
            num INT,
            start INT,
            end INT,
            sentence TEXT
        )""")
        conn_new.execute("""
        CREATE TABLE predication (
            pred_id INT,
            predicate TEXT,
            sub_name TEXT,
            sub_ty TEXT,
            obj_name TEXT,
            obj_ty TEXT
        )""")
        conn_new.execute("""
        CREATE TABLE aux (
            pred_aux_id INT,
            sub_text TEXT,
            sub_dist INT,
            sub_maxdist INT,
            sub_start INT,
            sub_end INT,
            sub_score INT,
            ind_ty TEXT,
            pred_start INT,
            pred_end INT,
            obj_text TEXT,
            obj_dist INT,
            obj_maxdist INT,
            obj_start INT,
            obj_end INT,
            obj_score INT
        )""")
        conn_new.execute("""
        CREATE TABLE id (
            sent_id INTEGER PRIMARY KEY,
            pmid INTEGER
        )""")
        conn_new.execute("""
        CREATE TABLE pred_sent (
            sent_id INTEGER,
            pred_id INTEGER
        )""")
        conn_new.execute("""
        CREATE TABLE pred_aux (
            pred_id INTEGER,
            pred_aux_id INTEGER
        )""")

    # フィルタ条件に合うデータを抽出して新しいDBに挿入
    with conn_original:
        # 条件を満たすsent_idを取得
        sent_ids = conn_original.execute("""
        SELECT sent_id FROM sentence
        WHERE type = 'ab'
        """).fetchall()
        sent_ids = [row[0] for row in sent_ids]
        
        # 条件を満たすpred_idを取得
        pred_ids = conn_original.execute("""
        SELECT pred_id FROM predication
        WHERE sub_ty = 'cell' OR obj_ty = 'cell'
        """).fetchall()
        pred_ids = [row[0] for row in pred_ids]
        
        # sent_idとpred_idを関連付ける
        filtered_pred_sent = conn_original.execute(f"""
        SELECT sent_id, pred_id FROM pred_sent
        WHERE sent_id IN ({",".join(map(str, sent_ids))})
        AND pred_id IN ({",".join(map(str, pred_ids))})
        """).fetchall()

        # データを新しいDBに挿入
        for sent_id, pred_id in filtered_pred_sent:
            row_sent = conn_original.execute("""
            SELECT * FROM sentence WHERE sent_id = ?
            """, (sent_id,)).fetchone()

            if row_sent:
                conn_new.execute("""
                INSERT OR IGNORE INTO sentence (sent_id, type, num, start, end, sentence)
                VALUES (?, ?, ?, ?, ?, ?)
                """, row_sent)
                conn_new.commit()

            row_id = conn_original.execute("""
            SELECT * FROM id WHERE sent_id = ?
            """, (sent_id,)).fetchone()

            if row_id:
                conn_new.execute("""
                INSERT OR IGNORE INTO id (sent_id, pmid)
                VALUES (?, ?)
                """, row_id)
                conn_new.commit()
            
            row_pred = conn_original.execute("""
            SELECT * FROM predication WHERE pred_id = ?
            """, (pred_id,)).fetchone()

            if row_pred:
                conn_new.execute("""
                INSERT INTO predication (pred_id, predicate, sub_name, sub_ty, obj_name, obj_ty)
                VALUES (?, ?, ?, ?, ?, ?)
                """, row_pred)
                conn_new.commit()
            
            row_pred_sent = conn_original.execute("""
            SELECT * FROM pred_sent WHERE pred_id = ?
            """, (pred_id,)).fetchone()

            if row_pred_sent:
                conn_new.execute("""
                INSERT INTO pred_sent (pred_id, sent_id)
                VALUES (?, ?)
                """, row_pred_sent)
                conn_new.commit()

            row_pred_aux = conn_original.execute("""
            SELECT * FROM pred_aux WHERE pred_id = ?
            """, (pred_id,)).fetchone()

            if row_pred_aux:
                conn_new.execute("""
                INSERT INTO pred_aux (pred_aux_id, pred_id)
                VALUES (?, ?)
                """, row_pred_aux)
                conn_new.commit()
            
            rows = conn_original.execute("""
            SELECT * FROM aux WHERE pred_aux_id IN (
                SELECT pred_aux_id FROM pred_aux WHERE pred_id = ?
            )
            """, (pred_id,)).fetchall()

            for aux_row in rows:
                conn_new.execute("""
                INSERT INTO aux (pred_aux_id, sub_text, sub_dist, sub_maxdist, sub_start, sub_end,
                                sub_score, ind_ty, pred_start, pred_end, obj_text, obj_dist,
                                obj_maxdist, obj_start, obj_end, obj_score)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                """, aux_row)
                conn_new.commit()

    # コネクションを閉じる
    conn_original.close()
    conn_new.close()