# Troppo 방법론 벤치마크 튜토리얼
# Troppo Method Benchmark Tutorial

이 튜토리얼은 Troppo의 벤치마크 프레임워크를 사용하여 여러 오믹스 통합 방법론을 비교하는 방법을 보여줍니다.

This tutorial demonstrates how to use Troppo's benchmark framework to compare multiple omics integration methods.

## 목차 (Contents)

1. [기본 벤치마크](#basic)
2. [상세 벤치마크 with 검증](#detailed)
3. [결과 시각화](#visualization)
4. [커스텀 설정](#custom)
5. [레포트 생성](#report)

---

## 설정 (Setup)

In [None]:
import pandas as pd
import cobra
import re
import numpy as np

# Troppo imports
from troppo.omics.readers.generic import TabularReader
from troppo.methods_wrappers import ModelBasedWrapper
from troppo.methods.registry import MethodRegistry
from troppo.benchmark import BenchmarkRunner, quick_benchmark
from troppo.benchmark.visualization import (
    plot_performance_comparison,
    plot_overlap_heatmap,
    plot_pareto_front,
    plot_radar_chart,
    create_comparison_report
)

print("✓ Imports successful")

### 등록된 방법론 확인 (Check Registered Methods)

In [None]:
# 등록된 모든 방법론 확인
print("Available methods:")
for method in MethodRegistry.list_methods():
    print(f"  - {method}")

print("\nDetailed registry:")
MethodRegistry.print_registry()

### 데이터 로드 (Load Data)

In [None]:
# GPR parsing function
patt = re.compile('__COBAMPGPRDOT__[0-9]{1}')
replace_alt_transcripts = lambda x: patt.sub('', x)

# Load model
model_path = 'data/HumanGEM_Consistent_COVID19_HAM.xml'
model = cobra.io.read_sbml_model(model_path)
print(f"Model loaded: {len(model.reactions)} reactions, {len(model.metabolites)} metabolites")

# Load omics data
omics_data_path = 'data/Desai-GTEx_ensembl.csv'
omics_data = pd.read_csv(filepath_or_buffer=omics_data_path, index_col=0)
print(f"Omics data loaded: {omics_data.shape[0]} samples, {omics_data.shape[1]} genes")

### 데이터 전처리 (Preprocess Data)

In [None]:
# Create omics container
reader = TabularReader(
    path_or_df=omics_data,
    nomenclature='ensemble_gene_id',
    omics_type='transcriptomics'
)
omics_container = reader.to_containers()[0]
print(f"OmicsContainer created: {len(omics_container.get_Data())} genes")

# Create model wrapper
model_wrapper = ModelBasedWrapper(
    model=model,
    ttg_ratio=9999,
    gpr_gene_parse_function=replace_alt_transcripts
)
print(f"ModelWrapper created: {len(model_wrapper.model_reader.r_ids)} reactions")

# Map genes to reactions
data_map = omics_container.get_integrated_data_map(
    model_reader=model_wrapper.model_reader,
    and_func=min,
    or_func=sum
)
print(f"Gene-Reaction mapping complete: {len(data_map.get_scores())} scores")

---

## 1. 빠른 벤치마크 (Quick Benchmark) {#basic}

가장 간단한 방법으로 모든 방법론을 비교합니다.

In [None]:
# Quick benchmark - returns summary dataframe
summary_df = quick_benchmark(
    model_wrapper=model_wrapper,
    data_map=data_map,
    methods=['gimme', 'tinit', 'imat', 'fastcore'],
    biomass_reaction='biomass_human',
    verbose=True
)

print("\n" + "=" * 80)
print("BENCHMARK SUMMARY")
print("=" * 80)
print(summary_df)
print("=" * 80)

---

## 2. 상세 벤치마크 (Detailed Benchmark) {#detailed}

생물학적 검증과 일관성 체크를 포함한 상세 벤치마크입니다.

In [None]:
# Create benchmark runner
runner = BenchmarkRunner(
    model_wrapper=model_wrapper,
    data_map=data_map,
    methods=['gimme', 'tinit', 'imat', 'fastcore'],
    biomass_reaction='biomass_human',
    verbose=True
)

# Run with validation
comparison = runner.run_benchmark(
    validate_biology=True,
    validate_consistency=True
)

print("\nBenchmark completed!")

### 결과 분석 (Result Analysis)

In [None]:
# Summary table
print("Performance Summary:")
print(comparison.get_summary_dataframe())

# Overlap matrix
print("\nReaction Overlap Matrix (Jaccard Similarity):")
print(comparison.get_overlap_matrix())

# Common reactions
common_rxns = comparison.get_common_reactions()
print(f"\nReactions common to all methods: {len(common_rxns)}")

# Unique reactions per method
print("\nUnique reactions by method:")
for method in comparison.results.keys():
    unique = comparison.get_unique_reactions(method)
    print(f"  {method}: {len(unique)} unique reactions")

### 방법론 순위 (Method Rankings)

In [None]:
# Rank by different metrics
metrics = ['execution_time', 'peak_memory', 'percentage_retained']

for metric in metrics:
    print(f"\nRanking by {metric}:")
    rankings = comparison.rank_methods(metric)
    for rank, (method, value) in enumerate(rankings, 1):
        print(f"  {rank}. {method}: {value:.3f}")

---

## 3. 결과 시각화 (Visualization) {#visualization}

### Performance Comparison Plot

In [None]:
plot_performance_comparison(
    comparison,
    metrics=['execution_time', 'peak_memory', 'percentage_retained', 'biomass_flux'],
    figsize=(15, 10)
)

### Overlap Heatmap

In [None]:
plot_overlap_heatmap(comparison, figsize=(10, 8))

### Pareto Front (Trade-off Analysis)

In [None]:
# Time vs Performance trade-off
plot_pareto_front(
    comparison,
    x_metric='execution_time',
    y_metric='percentage_retained',
    figsize=(10, 6)
)

# Memory vs Performance trade-off
plot_pareto_front(
    comparison,
    x_metric='peak_memory',
    y_metric='biomass_flux',
    figsize=(10, 6)
)

### Radar Chart (Multi-metric Comparison)

In [None]:
plot_radar_chart(
    comparison,
    metrics=['execution_time', 'peak_memory', 'percentage_retained', 'biomass_flux'],
    normalize=True,
    figsize=(10, 10)
)

---

## 4. 커스텀 설정 (Custom Configuration) {#custom}

각 방법론에 대한 커스텀 파라미터를 지정할 수 있습니다.

In [None]:
# Define method-specific configurations
method_configs = {
    'gimme': {
        'obj_frac': 0.9,  # More strict objective fraction
        'flux_threshold': 0.9,
        'preprocess': True
    },
    'tinit': {
        'threshold': np.percentile(
            [v for v in data_map.get_scores().values() if v is not None],
            80  # Top 20% instead of 25%
        )
    },
    'imat': {
        'high_threshold': np.percentile(
            [v for v in data_map.get_scores().values() if v is not None],
            75  # Adjusted thresholds
        ),
        'low_threshold': np.percentile(
            [v for v in data_map.get_scores().values() if v is not None],
            25
        ),
        'epsilon': 0.5
    }
}

# Run with custom configs
runner_custom = BenchmarkRunner(
    model_wrapper=model_wrapper,
    data_map=data_map,
    methods=['gimme', 'tinit', 'imat'],
    biomass_reaction='biomass_human',
    verbose=True
)

comparison_custom = runner_custom.run_benchmark(
    method_configs=method_configs,
    validate_biology=True
)

print("\nCustom configuration results:")
print(comparison_custom.get_summary_dataframe())

---

## 5. 종합 레포트 생성 (Generate Comprehensive Report) {#report}

모든 결과를 포함하는 종합 레포트를 생성합니다.

In [None]:
# Create comprehensive report
create_comparison_report(
    comparison,
    output_dir='benchmark_results',
    include_plots=True
)

print("\n✓ Report generated in 'benchmark_results' directory")
print("\nGenerated files:")
print("  - benchmark_summary_*.csv")
print("  - overlap_matrix_*.csv")
print("  - benchmark_results_*.json")
print("  - performance_comparison_*.png")
print("  - overlap_heatmap_*.png")
print("  - pareto_front_*.png")
print("  - radar_chart_*.png")
print("  - benchmark_report_*.md")

### 결과 저장 및 불러오기 (Save and Load Results)

In [None]:
# Save results to JSON
comparison.save_to_json('my_benchmark_results.json')
print("Results saved to my_benchmark_results.json")

# Load results later
from troppo.benchmark import BenchmarkComparison
loaded_comparison = BenchmarkComparison.load_from_json('my_benchmark_results.json')
print("\nResults loaded successfully!")
print(loaded_comparison.get_summary_dataframe())

---

## 추가: 개별 방법 상세 분석 (Individual Method Analysis)

In [None]:
# Analyze a specific method in detail
method_name = 'gimme'

if method_name in comparison.results:
    result = comparison.results[method_name]
    
    print(f"\nDetailed Analysis: {method_name.upper()}")
    print("=" * 60)
    print(f"Execution Time: {result.execution_time:.2f} seconds")
    print(f"Peak Memory: {result.peak_memory:.2f} MB")
    print(f"Reactions Selected: {result.num_reactions_selected}")
    print(f"Percentage Retained: {result.percentage_retained:.2f}%")
    
    if result.biomass_flux is not None:
        print(f"Biomass Flux: {result.biomass_flux:.4f}")
    
    if result.num_blocked_reactions is not None:
        print(f"Blocked Reactions: {result.num_blocked_reactions}")
    
    # Show some selected reactions
    print(f"\nSample of selected reactions (first 10):")
    for rxn_id in result.selected_reaction_ids[:10]:
        print(f"  - {rxn_id}")
    
    # Unique reactions
    unique = comparison.get_unique_reactions(method_name)
    print(f"\nReactions unique to {method_name}: {len(unique)}")

---

## 요약 (Summary)

이 튜토리얼에서 다룬 내용:

1. **빠른 벤치마크**: `quick_benchmark()` 함수로 간단하게 비교
2. **상세 벤치마크**: `BenchmarkRunner`로 생물학적 검증 포함
3. **시각화**: 다양한 차트로 결과 비교
4. **커스텀 설정**: 방법론별 파라미터 조정
5. **레포트 생성**: 종합 레포트 자동 생성

### 주요 기능:

- ✅ 여러 방법론 동시 비교
- ✅ 성능 지표 자동 수집 (시간, 메모리, 모델 크기)
- ✅ 생물학적 검증 (biomass flux, task completion)
- ✅ 네트워크 일관성 체크
- ✅ 반응 중복 분석
- ✅ 다양한 시각화
- ✅ 자동 레포트 생성

---

**For more information:**
- Troppo Documentation: http://troppo-bisbi.readthedocs.io/
- GitHub: https://github.com/BioSystemsUM/troppo