In [25]:
from pathlib import Path
import pandas as pd

# Define the repository root path - handle different execution contexts
current_dir = Path.cwd()
if current_dir.name == 'jupyter':
    repo_root = current_dir.parent
elif current_dir.name == 'datasets':
    repo_root = current_dir.parent
elif 'openai_rag_jp' in current_dir.name:
    repo_root = current_dir
else:
    # Try to find the repo root by looking for common project files
    if (current_dir / 'datasets').exists():
        repo_root = current_dir
    elif (current_dir.parent / 'datasets').exists():
        repo_root = current_dir.parent
    else:
        repo_root = current_dir

dataset_name = 'FreedomIntelligence_medical_o1_reasoning_SFT_train_1000_20250918_195818.csv'
medical_path = repo_root / 'datasets' / dataset_name

# pandasで、dfにmedical_pathを読み取る。
medical_df = pd.read_csv(medical_path)
# print(medical_df.head(3))
print("列名一覧:")
print(medical_df.columns.tolist())

# 1行目のQuestion
print(medical_df.iloc[0])

# dfを表示する。
display(medical_df.head(3))


列名一覧:
['Question', 'Complex_CoT', 'Response']
Question       Given the symptoms of sudden weakness in the l...
Complex_CoT    Okay, let's see what's going on here. We've go...
Response       The specific cardiac abnormality most likely t...
Name: 0, dtype: object


Unnamed: 0,Question,Complex_CoT,Response
0,Given the symptoms of sudden weakness in the l...,"Okay, let's see what's going on here. We've go...",The specific cardiac abnormality most likely t...
1,A 33-year-old woman is brought to the emergenc...,"Okay, let's figure out what's going on here. A...","In this scenario, the most likely anatomical s..."
2,A 61-year-old woman with a long history of inv...,"Okay, let's think about this step by step. The...",Cystometry in this case of stress urinary inco...


In [27]:
# 推奨方法: itertuples を使った1行ずつの取得（高速）
print("===== itertuples で1行ずつ取得（推奨） =====")
for row in medical_df.head(3).itertuples():
   print(f"\n行番号 {row.Index}:")
   print(f"  Question: {row.Question[:80]}...")
   print(f"  Complex_CoT: {row.Complex_CoT[:80]}...")
   print(f"  Response: {row.Response[:80]}...")

===== itertuples で1行ずつ取得（推奨） =====

行番号 0:
  Question: Given the symptoms of sudden weakness in the left arm and leg, recent long-dista...
  Complex_CoT: Okay, let's see what's going on here. We've got sudden weakness in the person's ...
  Response: The specific cardiac abnormality most likely to be found in this scenario is a p...

行番号 1:
  Question: A 33-year-old woman is brought to the emergency department 15 minutes after bein...
  Complex_CoT: Okay, let's figure out what's going on here. A woman comes in with a stab wound ...
  Response: In this scenario, the most likely anatomical structure to be injured is the lowe...

行番号 2:
  Question: A 61-year-old woman with a long history of involuntary urine loss during activit...
  Complex_CoT: Okay, let's think about this step by step. There's a 61-year-old woman here who'...
  Response: Cystometry in this case of stress urinary incontinence would most likely reveal ...


In [None]:
# データフレームの行数を確認
print(f"データフレームの総行数: {len(medical_df)} 行")
print(f"データフレームの列数: {len(medical_df.columns)} 列")
print()

# 推奨方法: itertuples を使った1行ずつの取得（高速）
print("===== itertuples で1行ずつ取得（推奨） =====")
for row in medical_df.head(3).itertuples():
    print(f"\n行番号 {row.Index}:")
    print(f"  Question: {row.Question[:80]}...")
    print(f"  Complex_CoT: {row.Complex_CoT[:80]}...")
    print(f"  Response: {row.Response[:80]}...")

In [None]:
# おすすめの使い分け
print("【推奨される使い分け】")
print()

print("1. ⭐ itertuples() - 最速、大量データ処理に最適")
print("   - パフォーマンス重視の場合はこれ一択")
print("   - 1000行以上の処理では特に有効")
print()

print("2. iterrows() - 可読性が高い、少量データ向け")
print("   - 各列に名前でアクセスできて直感的")
print("   - 100行以下の処理なら問題なし")
print()

print("3. iloc[i] - 特定の行だけ取得したい場合")
print("   - ランダムアクセスが必要な場合")
print("   - 全行処理には不向き")
print()

# 実際の使用例（推奨：itertuples）
print("===== 実用例：itertuples()で全データ処理 =====")
question_lengths = []
for row in medical_df.itertuples():
    question_lengths.append(len(row.Question))
    
print(f"質問の平均文字数: {sum(question_lengths) / len(question_lengths):.1f} 文字")