In [None]:
!pip install rdflib

In [None]:

import os
import shutil
from pathlib import Path

# ファイルが入っている親フォルダ
SOURCE_PARENT = Path('https://github.com/naoki-kokaze/British_Warship_Career/tree/main/api_out')   # ← 書き換え

# 展開先（ここにすべて集める）
TARGET_PARENT = Path('https://github.com/naoki-kokaze/British_Warship_Career/tree/main/out_all')   # ← 書き換え
TARGET_PARENT.mkdir(parents=True, exist_ok=True)

for child in SOURCE_PARENT.iterdir():
    if child.is_dir():
        for item in child.iterdir():
            dest = TARGET_PARENT / item.name

            # 名前衝突を避けて移動
            if dest.exists():
                dest = TARGET_PARENT / f"{item.stem}_moved{item.suffix}"

            shutil.move(str(item), str(dest))

print("✅ 完了：子ディレクトリ内のファイルを指定した別ディレクトリへ展開しました。")


In [None]:
import os
import logging

# --- 設定 ---
BASE_DIR = 'https://github.com/naoki-kokaze/British_Warship_Career/tree/main/'
SOURCE_DIR = os.path.join(BASE_DIR, 'out_all') # APIが生成した269個のTTLファイルがあるディレクトリ
ONTOLOGY_PATH = os.path.join(BASE_DIR, 'scheme', 'warship_career_ontology.ttl') # オントロジーファイルのパス
OUTPUT_FILE_PATH = os.path.join(BASE_DIR, 'data', 'ship_careers_combined.ttl') # 結合後のファイル
LOG_FILE = os.path.join(BASE_DIR, 'log', 'combination.log') # 結合ログ

# マスタープレフィックスブロック (warship_career_ontology_v3.ttl と同じもの)
MASTER_PREFIXES = """@prefix : <http://www.example.com/ontology/warship#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix sealit: <http://www.sealitproject.eu/ontology/> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix myData: <http://www.example.com/myData/> .

"""

logging.basicConfig(filename=LOG_FILE, level=logging.INFO, filemode='w',
                    format='%(asctime)s - %(levelname)s - %(message)s')
print(f"Log will be saved to: {LOG_FILE}")

# --- メイン処理 ---
if __name__ == "__main__":

    print(f"--- Starting String-based Combination ---")
    print(f"Source Directory: {SOURCE_DIR}")
    print(f"Output File: {OUTPUT_FILE_PATH}")
    logging.info(f"Starting String-based Combination. Source: {SOURCE_DIR}, Output: {OUTPUT_FILE_PATH}")

    if not os.path.isdir(SOURCE_DIR):
        msg = f"ERROR: Source directory not found: {SOURCE_DIR}"
        print(msg)
        logging.error(msg)
        exit()

    # 出力先ディレクトリの確認
    os.makedirs(os.path.dirname(OUTPUT_FILE_PATH), exist_ok=True)

    # 読み込むファイルリストを取得し、ファイル名順にソート
    try:
        ttl_files = sorted([f for f in os.listdir(SOURCE_DIR) if f.endswith('.ttl')])
    except FileNotFoundError:
        msg = f"ERROR: Source directory not found or inaccessible: {SOURCE_DIR}"
        print(msg)
        logging.error(msg)
        exit()

    if not ttl_files:
        msg = f"No .ttl files found in {SOURCE_DIR}"
        print(msg)
        logging.warning(msg)
        exit()

    print(f"Found {len(ttl_files)} .ttl files to combine.")
    logging.info(f"Found {len(ttl_files)} .ttl files to combine.")

    file_count = 0
    line_count = 0

    try:
        # 出力ファイル（ship_careers_combined.ttl）を書き込みモードで開く
        with open(OUTPUT_FILE_PATH, 'w', encoding='utf-8') as outfile:

            # 1. マスタープレフィックスブロックを書き込む
            outfile.write(MASTER_PREFIXES)
            outfile.write("\n\n") # プレフィックスとデータの間に空行を入れる

            # 2. 各ファイルをファイル名順に処理
            for filename in ttl_files:
                filepath = os.path.join(SOURCE_DIR, filename)

                # ファイル区切りコメントを挿入
                outfile.write(f"# --- Data from file: {filename} ---\n\n")

                try:
                    # 各ファイルを読み込みモードで開く
                    with open(filepath, 'r', encoding='utf-8') as infile:
                        # 11行目以降を読み込んで追記
                        # (行番号は1から始まるため、9行スキップ)
                        lines = infile.readlines()
                        if len(lines) > 10:
                            outfile.writelines(lines[10:]) # 11行目 (インデックス10) から最後まで
                            outfile.write("\n\n") # ファイルの間に空行を入れる
                            line_count += len(lines[10:])
                        else:
                            # 10行以下のファイル（プレフィックスのみなど）はスキップ
                            logging.warning(f"Skipping {filename}: File has 10 lines or fewer (likely empty or prefixes only).")
                            print(f"Skipping {filename}: File has 10 lines or fewer.")

                    file_count += 1

                    if file_count % 50 == 0:
                        print(f"Combined {file_count}/{len(ttl_files)} files...")

                except Exception as e:
                    logging.error(f"Error reading {filename}: {e}. Skipping this file.")
                    print(f"ERROR: Error reading {filename}: {e}. Skipping this file.")

        msg = f"Successfully combined {file_count} files ({line_count} data lines) into {OUTPUT_FILE_PATH}"
        print(msg)
        logging.info(msg)

    except IOError as e:
        msg = f"ERROR: Could not write to output file {OUTPUT_FILE_PATH}: {e}"
        print(msg)
        logging.error(msg)
    except Exception as e:
        msg = f"An unexpected error occurred during combination: {e}"
        print(msg)
        logging.exception(msg) # トレースバックをログに記録

    print("--- Combination Process Finished ---")
    logging.info("--- Combination Process Finished ---")
