## 1. Setup and Imports {#setup}

Let's start by importing all necessary modules and setting up the environment.

In [None]:
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
import sys
import os
abxtract_path = "/home/HX46_FR5/repo_perso/AbXtract"
sys.path.insert(0, abxtract_path)

warnings.filterwarnings('ignore')

from AbXtract import *
from AbXtract import AntibodyDescriptorCalculator, Config, load_config
from AbXtract.sequence import (
    SequenceLiabilityAnalyzer,
    BashourDescriptorCalculator,
    PeptideDescriptorCalculator,
    AntibodyNumbering
)
from AbXtract.structure import (
    SASACalculator,
    ChargeAnalyzer,
    DSSPAnalyzer,
    PropkaAnalyzer,
    ArpeggioAnalyzer
)
from AbXtract.utils import (
    read_fasta,
    write_fasta,
    parse_sequence,
    validate_sequence
)

# default configuration
custom_config = Config()

'''
# Test custom configuration
custom_config = Config.from_dict({
    'pH': 7.4,
    'numbering_scheme': 'kabat',
    'verbose': True,
    'calculate_dssp': tool_status.get('dssp', False),
    'calculate_propka': tool_status.get('propka', False),
    'calculate_arpeggio': tool_status.get('arpeggio', False)
})
'''

# Check external tool availability
tool_status = custom_config.check_external_tools()
print("🛠️ External Tool Status:")
for tool, available in tool_status.items():
    status = "✅" if available else "❌"
    print(f"  {tool}: {status}")
    
# Set up test data paths
BASE_DIR = Path.cwd() 
DATA_DIR = BASE_DIR / "data" / "test"
DATA_DIR.mkdir(parents=True, exist_ok=True)


# Define test file paths
RESULTS_DIR = DATA_DIR / "results"
RESULTS_DIR.mkdir(exist_ok=True)


In [None]:
# Test antibody sequences (based on therapeutic antibodies)
# Heavy chain: Includes realistic VH domain + human IgG1 constant region
HEAVY_SEQUENCE = (
    "QVQLVQSGAEVKKPGASVKVSCKASGGTFSSYAISWVRQAPGQGLEWMG"
)

# Light chain: Includes realistic VL domain + human kappa constant region  
LIGHT_SEQUENCE = (
    "DIQMTQSPSSLSASVGDRVTITCRASHSISSYLAWYQQKPGKAPKLLIY"
)

PDB_FILE = DATA_DIR / "test.pdb"  # User will provide this



In [None]:
numbering = AntibodyNumbering() #scheme='imgt')
peptide_calc = PeptideDescriptorCalculator()
calc = AntibodyDescriptorCalculator(config=custom_config)


In [None]:
heavy_valid, heavy_msg = validate_sequence(HEAVY_SEQUENCE)
light_valid, light_msg = validate_sequence(LIGHT_SEQUENCE)


In [None]:
heavy_numbered = numbering.number_sequence(HEAVY_SEQUENCE, 'H')  # Use VH portion only
light_numbered = numbering.number_sequence(LIGHT_SEQUENCE, 'L')  # Use VH portion only

annotated_H, cdrs_H = numbering.get_cdr_sequences(heavy_numbered, 'H')
annotated_L, cdrs_L = numbering.get_cdr_sequences(light_numbered, 'L')

heavy_profiles = numbering.get_peptide_profiles(HEAVY_SEQUENCE)
light_profiles = numbering.get_peptide_profiles(LIGHT_SEQUENCE)

In [None]:
peptide_results = peptide_calc.calculate_all(
    heavy_sequence=HEAVY_SEQUENCE,
    light_sequence=LIGHT_SEQUENCE
)


In [None]:
sequence_results, liabilities = calc.calculate_sequence_descriptors(
    heavy_sequence=HEAVY_SEQUENCE,
    light_sequence=LIGHT_SEQUENCE,
    sequence_id="TestAb_Sequence"
)

In [None]:


# Run structure analysis if PDB is available
structure_results_seq, structure_results_comp, df_residues = calc.calculate_structure_descriptors(
    pdb_file=PDB_FILE,
    structure_id="TestAb_Structure"
)


