## Imports and Setup

In [1]:
from pathlib import Path
import pandas as pd
import logging

# Configure basic logging for the notebook
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Imports from moremi_biokit.proteins
from moremi_biokit.proteins import (
    BatchProteinProcessor,
    ProteinValidatorV2,
    ProteinRanker,
    ScoringConfig,       # For custom scoring (optional)
    MetricCategory,      # For understanding metrics (optional)
)

from moremi_biokit.pdb_fetcher import(
    list_internal_pdb_ids
)

# Pandas display options
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)

2025-05-22 16:48:41,584 - INFO - Enabling RDKit 2025.03.2 jupyter extensions
  vars(torch.load(path, map_location=lambda storage, loc: storage)["args"]),
  state = torch.load(path, map_location=lambda storage, loc: storage)


Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "readout.1.weight".
Loading pretrained parameter "readout.1.bias".
Loading pretrained parameter "readout.4.weight".
Loading pretrained parameter "readout.4.bias".
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "readout.1.weight".
Loading pretrained parameter "readout.1.bias".
Loading pretrained parameter "readout.4.weight".
Loading pretrained parameter "readout.4.b

  state = torch.load(path, map_location=lambda storage, loc: storage)


Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "readout.1.weight".
Loading pretrained parameter "readout.1.bias".
Loading pretrained parameter "readout.4.weight".
Loading pretrained parameter "readout.4.bias".
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "readout.1.weight".
Loading pretrained parameter "readout.1.bias".
Loading pretrained parameter "readout.4.weight".
Loading pretrained parameter "readout.4.b

In [2]:
from moremi_biokit.proteins.analysis_tools.structure_predictor import predict_structure

In [2]:
s = predict_structure("NYMGNPWTEYMAKYDIEEVHGSGIRVDLGEDAEVAGTQYRLPSGKCPVFGKGIIIENSNTTFLKPVATGNQDLKDGGFAFPPTNPLISPMTLNGMRDFYKNNEYVKNLDELTLCSRHAGNMNPDNDKNSNYKYPAVYDYNDKKCHILYIAAQENNGPRYCNKDQSKRNSMFCFRPAKDKLFENYTYLSKNVVDNWEEVCPRKNLENAKFGLWVDGNCEDIPHVNEFSANDLFECNKLVFELSASDQPKQYEQHLTDYEKIKEGFKNKNASMIKSAFLPTGAFKADRYKSHGKGYNWGNYNRETQKCEIFNVKPTCLINNSSYIATTALSHPIEVE")

status_data: {'project_id': '1518be', 'status': 'COMPLETED', 'models': [{'model_id': '01', 'status': 'COMPLETED', 'gmqe': 0.86, 'qmean_global': {'avg_local_score': 0.78}, 'coordinates_url': 'https://swissmodel.expasy.org/project/1518be/models/01.pdb.gz', 'modelcif_url': 'https://swissmodel.expasy.org/project/1518be/models/01.cif.gz'}, {'model_id': '02', 'status': 'COMPLETED', 'gmqe': 0.68, 'qmean_global': {'avg_local_score': 0.65}, 'coordinates_url': 'https://swissmodel.expasy.org/project/1518be/models/02.pdb.gz', 'modelcif_url': 'https://swissmodel.expasy.org/project/1518be/models/02.cif.gz'}], 'date_created': '2025-05-20T15:22:53.911207Z', 'project_title': 'moremi_biokit_prediction_c66cd977-c0fc-4f7c-bb7a-4d0bce53dde7', 'view_url': 'https://swissmodel.expasy.org/project/1518be/view'}
models: [{'model_id': '01', 'status': 'COMPLETED', 'gmqe': 0.86, 'qmean_global': {'avg_local_score': 0.78}, 'coordinates_url': 'https://swissmodel.expasy.org/project/1518be/models/01.pdb.gz', 'modelcif_u

In [17]:
s

{'message': 'Structure prediction successful.',
 'project_id': '1518be',
 'gmqe': 0.86,
 'model_details': {'model_id': '01',
  'qmean_disco_global': None,
  'qmean_z_score': None,
  'seqid': None,
  'template': None,
  'method': None,
  'coverage': None,
  'ligands': None,
  'created_date': None},
 'pdb_file_path': './predicted_structure_20250522_073251.pdb'}

In [3]:
vali = ProteinValidatorV2(pdb_files_path="zzztestzzz")
vali.set_antigen_context(target_antigen_pdb_file_path="/home/mino_solo/moremi_toolkits/moremi-biokit/protein_notebook_outputs/batch_processor_results/ranking_run_20250521_210846/validator_antigen_pdbs/antigen_structure_pred_20250521_210907.pdb")
res  = vali.process_protein("NYMGNPWTEYMAKYDIEEVHGSGIRVDLGEDAEVAGTQYRLPSGKCPVFGKGIIIENSNTTFLKPVATGNQDLKDGGFAFPPTNPLISPMTLNGMRDFYKNNEYVKNLDELTLCSRHAGNMNPDNDKNSNYKYPAVYDYNDKKCHILYIAAQENNGPRYCNKDQSKRNSMFCFRPAKDKLFENYTYLSKNVVDNWEEVCPRKNLENAKFGLWVDGNCEDIPHVNEFSANDLFECNKLVFELSASDQPKQYEQHLTDYEKIKEGFKNKNASMIKSAFLPTGAFKADRYKSHGKGYNWGNYNRETQKCEIFNVKPTCLINNSSYIATTALSHPIEVE")
res

2025-05-22 09:01:17,308 - INFO - ProteinValidatorV2 initialized. Antibody PDB output path: zzztestzzz. Metrics to run: All. Antigen context will be set separately if needed.
2025-05-22 09:01:17,312 - INFO - 🔍 Attempting to set antigen context. Primary goal: obtain PDB path.
2025-05-22 09:01:17,314 - INFO - 🔍 Antigen PDBs will be managed in: zzztestzzz/antigen_pdbs
2025-05-22 09:01:17,315 - INFO - Priority 1: Checking local antigen PDB path: /home/mino_solo/moremi_toolkits/moremi-biokit/protein_notebook_outputs/batch_processor_results/ranking_run_20250521_210846/validator_antigen_pdbs/antigen_structure_pred_20250521_210907.pdb
2025-05-22 09:01:17,317 - INFO - ✅ Successfully set antigen PDB path from local file: /home/mino_solo/moremi_toolkits/moremi-biokit/protein_notebook_outputs/batch_processor_results/ranking_run_20250521_210846/validator_antigen_pdbs/antigen_structure_pred_20250521_210907.pdb (Inferred ID: antigen_structure_pred_20250521_210907)
2025-05-22 09:01:17,317 - INFO - Loca

🧪 Processing protein sequence: NYMGNPWTEYMAKYDIEEVH...
├── Calculating basic properties...
├── 🧬 Running BLAST analysis...


2025-05-22 09:05:20,362 - INFO - │   └── ✓ BLAST complete
2025-05-22 09:05:20,365 - INFO - ├── 🔍 Calculating ProtParam properties...
2025-05-22 09:05:20,367 - INFO - │   └── ✓ ProtParam complete
2025-05-22 09:05:20,369 - INFO - ├── 🦠 Assessing immunogenicity...


│   └── ✓ BLAST complete
├── 🔍 Analyzing ProtParam properties...
│   └── ✓ ProtParam complete
├── 🦠 Assessing immunogenicity...


2025-05-22 09:05:21,092 - INFO - │   └── ✓ Immunogenicity assessment complete
2025-05-22 09:05:21,095 - INFO - ├── 🔥 Evaluating stability...
2025-05-22 09:05:21,097 - INFO - │   └── ✓ Stability evaluation complete
2025-05-22 09:05:21,099 - INFO - ├── 🧱 Predicting aggregation propensity...
2025-05-22 09:05:21,103 - INFO - │   └── ✓ Aggregation prediction complete
2025-05-22 09:05:21,104 - INFO - ├── 🍭  Identifying glycosylation sites...
2025-05-22 09:05:21,106 - INFO - │   └── ✓ Glycosylation sites identified
2025-05-22 09:05:21,110 - INFO - ├── 🧩 Generating structural model (antibody)...


API returned error: 
	

	



│   └── ✓ Immunogenicity assessment complete
├── 🔥 Evaluating stability...
│   └── ✓ Stability evaluation complete
├── 🧱 Predicting aggregation propensity...
│   └── ✓ Aggregation prediction complete
├── 🍭 Identifying glycosylation sites...
│   └── ✓ Glycosylation sites identified
├── 🧩 Generating structural model (antibody)...


2025-05-22 09:05:38,439 - INFO - │   └── ✓ Antibody structural model generated: zzztestzzz/antibody_C10H7N37O0S18_20250522_090538.pdb
2025-05-22 09:05:38,444 - INFO - ├── 🔗 Calculating binding affinity...


│   └── ✓ Antibody structural model generated
├── 🔗 Calculating binding affinity...
Processing files:
- /home/mino_solo/moremi_toolkits/moremi-biokit/protein_notebook_outputs/batch_processor_results/ranking_run_20250521_210846/validator_antigen_pdbs/antigen_structure_pred_20250521_210907.pdb
- zzztestzzz/antibody_C10H7N37O0S18_20250522_090538.pdb
Using system type: linux
Combining PDB files into complex.pdb
Running command: prodigy complex.pdb


2025-05-22 09:05:39,770 - INFO - │   └── ✓ Binding affinity calculated against antigen PDB antigen_structure_pred_20250521_210907
2025-05-22 09:05:39,771 - INFO - ├── 🎯 Predicting epitopes...


│   └── ✓ Binding affinity calculated against antigen PDB antigen_structure_pred_20250521_210907
├── 🎯 Predicting epitope regions...


2025-05-22 09:07:37,439 - INFO - │   └── ✓ Epitope prediction complete
2025-05-22 09:07:37,440 - INFO - ├── 🌐 Analyzing sequence conservancy...
2025-05-22 09:07:37,454 - INFO - │   └── ✓ Conservancy analysis complete
2025-05-22 09:07:37,455 - INFO - ├── 🔧 Assessing developability...


│   └── ✓ Epitope prediction complete
├── 🌐 Analyzing sequence conservancy...
│   └── ✓ Conservancy analysis complete
├── 🔧 Assessing developability...


2025-05-22 09:07:41,017 - INFO - │   └── ✓ Developability assessment complete
2025-05-22 09:07:41,018 - INFO - └── 📊 Collection of metrics complete...
2025-05-22 09:07:41,020 - INFO - 

✅ Successfully processed protein NYMGNPWTEYMAKYDIEEVH...




│   └── ✓ Developability assessment complete
└── 📊 Collection of metrics complete


✅ Successfully processed protein NYMGNPWTEYMAKYDIEEVH...




ProcessingResult(sequence='NYMGNPWTEYMAKYDIEEVHGSGIRVDLGEDAEVAGTQYRLPSGKCPVFGKGIIIENSNTTFLKPVATGNQDLKDGGFAFPPTNPLISPMTLNGMRDFYKNNEYVKNLDELTLCSRHAGNMNPDNDKNSNYKYPAVYDYNDKKCHILYIAAQENNGPRYCNKDQSKRNSMFCFRPAKDKLFENYTYLSKNVVDNWEEVCPRKNLENAKFGLWVDGNCEDIPHVNEFSANDLFECNKLVFELSASDQPKQYEQHLTDYEKIKEGFKNKNASMIKSAFLPTGAFKADRYKSHGKGYNWGNYNRETQKCEIFNVKPTCLINNSSYIATTALSHPIEVE', metrics=ProteinMetrics(sequence='NYMGNPWTEYMAKYDIEEVHGSGIRVDLGEDAEVAGTQYRLPSGKCPVFGKGIIIENSNTTFLKPVATGNQDLKDGGFAFPPTNPLISPMTLNGMRDFYKNNEYVKNLDELTLCSRHAGNMNPDNDKNSNYKYPAVYDYNDKKCHILYIAAQENNGPRYCNKDQSKRNSMFCFRPAKDKLFENYTYLSKNVVDNWEEVCPRKNLENAKFGLWVDGNCEDIPHVNEFSANDLFECNKLVFELSASDQPKQYEQHLTDYEKIKEGFKNKNASMIKSAFLPTGAFKADRYKSHGKGYNWGNYNRETQKCEIFNVKPTCLINNSSYIATTALSHPIEVE', antigen='Antigen context not set', antigen_id='antigen_structure_pred_20250521_210907', molecular_weight=38286.4351, molecular_formula='C10H7N37O0S18', blast=[{'sequence_title': 'gb|AGL76296.1| apical membrane antigen 1, partial [Plasmodium falciparum]', 'length':

In [4]:
res.metrics.to_dict()

{'sequence': 'NYMGNPWTEYMAKYDIEEVHGSGIRVDLGEDAEVAGTQYRLPSGKCPVFGKGIIIENSNTTFLKPVATGNQDLKDGGFAFPPTNPLISPMTLNGMRDFYKNNEYVKNLDELTLCSRHAGNMNPDNDKNSNYKYPAVYDYNDKKCHILYIAAQENNGPRYCNKDQSKRNSMFCFRPAKDKLFENYTYLSKNVVDNWEEVCPRKNLENAKFGLWVDGNCEDIPHVNEFSANDLFECNKLVFELSASDQPKQYEQHLTDYEKIKEGFKNKNASMIKSAFLPTGAFKADRYKSHGKGYNWGNYNRETQKCEIFNVKPTCLINNSSYIATTALSHPIEVE',
 'antigen': 'Antigen context not set',
 'antigen_id': 'antigen_structure_pred_20250521_210907',
 'antigen_pdb_chain_id': None,
 'molecular_weight': 38286.4351,
 'molecular_formula': 'C10H7N37O0S18',
 'metrics': {'blast': [{'sequence_title': 'gb|AGL76296.1| apical membrane antigen 1, partial [Plasmodium falciparum]',
    'length': 545,
    'e_value': 0.0,
    'identity_percentage': 100.0,
    'matched_sequence_alignment': 'NYMGNPWTEYMAKYDIEEVHGSGIRVDLGEDAEVAGTQYRLPSGKCPVFGKGIIIENSNTTFLKPVATGNQDLKDGGFAFPPTNPLISPMTLNGMRDFYKNNEYVKNLDELTLCSRHAGNMNPDNDKNSNYKYPAVYDYNDKKCHILYIAAQENNGPRYCNKDQSKRNSMFCFRPAKDKLFENYTYLSKNVVDNWEEVCPRKNLENAKFGLWVDGNCEDIPH

In [11]:
from moremi_biokit.proteins.analysis_tools.conservancy import predict_conservancy

cc = predict_conservancy(protein_sequences="NYMGNPWTEYMAKYDIEEVHGSGIRVDLGEDAEVAGTQYRLPSGKCPVFGKGIIIENSNTTFLKPVATGNQDLKDGGFAFPPTNPLISPMTLNGMRDFYKNNEYVKNLDELTLCSRHAGNMNPDNDKNSNYKYPAVYDYNDKKCHILYIAAQENNGPRYCNKDQSKRNSMFCFRPAKDKLFENYTYLSKNVVDNWEEVCPRKNLENAKFGLWVDGNCEDIPHVNEFSANDLFECNKLVFELSASDQPKQYEQHLTDYEKIKEGFKNKNASMIKSAFLPTGAFKADRYKSHGKGYNWGNYNRETQKCEIFNVKPTCLINNSSYIATTALSHPIEVE", epitopes=['PWTEYMAKYDIEEVHG',
    'GEDAEVAGTQYRLPSG',
    'TTFLKPVATGNQDLKDGGFAFPPTNPLISP',
    'DFYKNNEYVKNLDELTL',
    'NPDNDKNSNY',
    'NNGPRYCNKDQSKRNSMF',
    'RPAKDKLF',
    'VDNWEEVCPRKNLENA',
    'FGLWVDGNCEDIPHVNEFSAND',
    'ASDQPKQYEQHLTDYEKI',
    'GFKNKNAS',
    'LPTGAFKADRYKSHGKGYNWGNYNRETQKC'])



In [12]:
ccd:pd.DataFrame = cc['results']

In [20]:
myd = ccd.to_dict('records')
myd

[{'Epitope #': 1,
  'Epitope Sequence': 'PWTEYMAKYDIEEVHG',
  'Percent of protein sequence matches at identity >= 70%': '100.00% (1/1)',
  'Minimum Identity': '100.00%',
  'Maximum Identity': '100.00%'},
 {'Epitope #': 2,
  'Epitope Sequence': 'GEDAEVAGTQYRLPSG',
  'Percent of protein sequence matches at identity >= 70%': '100.00% (1/1)',
  'Minimum Identity': '100.00%',
  'Maximum Identity': '100.00%'},
 {'Epitope #': 3,
  'Epitope Sequence': 'TTFLKPVATGNQDLKDGGFAFPPTNPLISP',
  'Percent of protein sequence matches at identity >= 70%': '100.00% (1/1)',
  'Minimum Identity': '100.00%',
  'Maximum Identity': '100.00%'},
 {'Epitope #': 4,
  'Epitope Sequence': 'DFYKNNEYVKNLDELTL',
  'Percent of protein sequence matches at identity >= 70%': '100.00% (1/1)',
  'Minimum Identity': '100.00%',
  'Maximum Identity': '100.00%'},
 {'Epitope #': 5,
  'Epitope Sequence': 'NPDNDKNSNY',
  'Percent of protein sequence matches at identity >= 70%': '100.00% (1/1)',
  'Minimum Identity': '100.00%',
  'Ma

In [21]:
df1 = pd.DataFrame(myd)
df1

Unnamed: 0,Epitope #,Epitope Sequence,Percent of protein sequence matches at identity >= 70%,Minimum Identity,Maximum Identity
0,1,PWTEYMAKYDIEEVHG,100.00% (1/1),100.00%,100.00%
1,2,GEDAEVAGTQYRLPSG,100.00% (1/1),100.00%,100.00%
2,3,TTFLKPVATGNQDLKDGGFAFPPTNPLISP,100.00% (1/1),100.00%,100.00%
3,4,DFYKNNEYVKNLDELTL,100.00% (1/1),100.00%,100.00%
4,5,NPDNDKNSNY,100.00% (1/1),100.00%,100.00%
5,6,NNGPRYCNKDQSKRNSMF,100.00% (1/1),100.00%,100.00%
6,7,RPAKDKLF,100.00% (1/1),100.00%,100.00%
7,8,VDNWEEVCPRKNLENA,100.00% (1/1),100.00%,100.00%
8,9,FGLWVDGNCEDIPHVNEFSAND,100.00% (1/1),100.00%,100.00%
9,10,ASDQPKQYEQHLTDYEKI,100.00% (1/1),100.00%,100.00%


In [4]:
import iedb
t = iedb.query_bcell_epitope(method="Bepipred-2.0", sequence="NYMGNPWTEYMAKYDIEEVHGSGIRVDLGEDAEVAGTQYRLPSGKCPVFGKGIIIENSNTTFLKPVATGNQDLKDGGFAFPPTNPLISPMTLNGMRDFYKNNEYVKNLDELTLCSRHAGNMNPDNDKNSNYKYPAVYDYNDKKCHILYIAAQENNGPRYCNKDQSKRNSMFCFRPAKDKLFENYTYLSKNVVDNWEEVCPRKNLENAKFGLWVDGNCEDIPHVNEFSANDLFECNKLVFELSASDQPKQYEQHLTDYEKIKEGFKNKNASMIKSAFLPTGAFKADRYKSHGKGYNWGNYNRETQKCEIFNVKPTCLINNSSYIATTALSHPIEVE", window_size=9)
t

API returned error: 
	

	





## Using BatchAntibodyProcessor

In [2]:
NOTEBOOK_OUTPUT_DIR = Path("protein_notebook_outputs")
NOTEBOOK_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
print(f"Output directory for this notebook: {NOTEBOOK_OUTPUT_DIR.resolve()}\n")

batch_output_dir = NOTEBOOK_OUTPUT_DIR / "batch_processor_results"
batch_output_dir.mkdir(parents=True, exist_ok=True)

print(f"Running BatchProteinProcessor... Output will be in: {batch_output_dir}\n")

antigen = "NYMGNPWTEYMAKYDIEEVHGSGIRVDLGEDAEVAGTQYRLPSGKCPVFGKGIIIENSNTTFLKPVATGNQDLKDGGFAFPPTNPLISPMTLNGMRDFYKNNEYVKNLDELTLCSRHAGNMNPDNDKNSNYKYPAVYDYNDKKCHILYIAAQENNGPRYCNKDQSKRNSMFCFRPAKDKLFENYTYLSKNVVDNWEEVCPRKNLENAKFGLWVDGNCEDIPHVNEFSANDLFECNKLVFELSASDQPKQYEQHLTDYEKIKEGFKNKNASMIKSAFLPTGAFKADRYKSHGKGYNWGNYNRETQKCEIFNVKPTCLINNSSYIATTALSHPIEVE"

# Instantiate the processor
# We can control individual PDF/CSV generation for enhanced reports via generate_pdf and generate_csv flags
processor = BatchProteinProcessor(
    input_file=str("/home/mino_solo/moremi_toolkits/moremi-biokit/protein_notebook_outputs/example_proteins.prot"),
    output_dir_base=str(batch_output_dir),
    generate_pdf=False,  # Generate individual PDF reports for each protein
    generate_csv=True,  # Do NOT generate individual CSV reports for each protein
    target_antigen_sequence=antigen
)

# Run the batch processing
processor.process_batch()

print(f"\nBatch processing complete. Check the directory: {batch_output_dir}")
print("This directory should contain:")
print("- Overall ranking CSV and PDF (if protein_report_generator is available).")
print("- `protein_reports/` subdirectory with individual PDF reports (because generate_pdf=True).")
print("- `pdbs/` subdirectory with downloaded/used PDB files.")
print("- Log files (`processing.log`, `failed_proteins.txt`).")

2025-05-22 16:48:50,834 - INFO - BatchProteinProcessor initialized. Base output directory: protein_notebook_outputs/batch_processor_results
2025-05-22 16:48:50,836 - INFO - 🚀 Starting batch processing for example_proteins.prot...
2025-05-22 16:48:50,840 - INFO - BatchProcessor.read_sequence_file read 4 sequences from example_proteins.prot
2025-05-22 16:48:50,842 - INFO - Calling rank_proteins_from_metrics. Output will be in a subdirectory of: protein_notebook_outputs/batch_processor_results
2025-05-22 16:48:50,846 - INFO - [protein_ranker.py:693] - Ranking process started. Output Dir: protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850
2025-05-22 16:48:50,849 - INFO - [protein_ranker.py:694] - Enhanced report generator available: True
2025-05-22 16:48:50,851 - INFO - [protein_ranker.py:695] - Ranking report generator available: True
2025-05-22 16:48:50,856 - INFO - [protein_validator_v2.py:143] - ProteinValidatorV2 initialized. Antibody PDB output path: protein

Output directory for this notebook: /home/mino_solo/moremi_toolkits/moremi-biokit/protein_notebook_outputs

Running BatchProteinProcessor... Output will be in: protein_notebook_outputs/batch_processor_results



2025-05-22 16:49:09,593 - INFO - [protein_validator_v2.py:179] - ✅ Successfully predicted antigen structure. PDB file: protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/validator_antigen_pdbs/antigen_structure_pred_20250522_164909.pdb
2025-05-22 16:49:09,600 - INFO - [protein_validator_v2.py:415] - ✅ Successfully predicted structure from user-provided/derived antigen sequence: protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/validator_antigen_pdbs/antigen_structure_pred_20250522_164909.pdb
2025-05-22 16:49:09,608 - INFO - [protein_validator_v2.py:431] - ✅ Antigen context successfully set with PDB_Path='protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/validator_antigen_pdbs/antigen_structure_pred_20250522_164909.pdb'. PDB_ID='predicted_user_sequence', Chain_ID='N/A'. Sequence (len 335) also obtained.
2025-05-22 16:49:09,612 - INFO - [protein_ranker.py:759] - Antigen context set: True
2025-05-22 16:49:09,6


Starting validation of 4 proteins (from input_source)...

Validating protein 1/4: QVQLVESGGGVVQPGRSLRL...
🧪 Processing protein sequence: QVQLVESGGGVVQPGRSLRL...
├── Calculating basic properties...
├── 🧬 Running BLAST analysis...


2025-05-22 16:57:15,226 - INFO - [protein_validator_v2.py:507] - │   └── ✓ BLAST complete
2025-05-22 16:57:15,232 - INFO - [protein_validator_v2.py:524] - ├── 🔍 Calculating ProtParam properties...
2025-05-22 16:57:15,238 - INFO - [protein_validator_v2.py:529] - │   └── ✓ ProtParam complete
2025-05-22 16:57:15,241 - INFO - [protein_validator_v2.py:547] - ├── 🦠 Assessing immunogenicity...


│   └── ✓ BLAST complete
├── 🔍 Analyzing ProtParam properties...
│   └── ✓ ProtParam complete
├── 🦠 Assessing immunogenicity...


2025-05-22 16:57:20,192 - INFO - [protein_validator_v2.py:552] - │   └── ✓ Immunogenicity assessment complete
2025-05-22 16:57:20,197 - INFO - [protein_validator_v2.py:570] - ├── 🔥 Evaluating stability...
2025-05-22 16:57:20,206 - INFO - [protein_validator_v2.py:575] - │   └── ✓ Stability evaluation complete
2025-05-22 16:57:20,213 - INFO - [protein_validator_v2.py:592] - ├── 🧱 Predicting aggregation propensity...
2025-05-22 16:57:20,223 - INFO - [protein_validator_v2.py:597] - │   └── ✓ Aggregation prediction complete
2025-05-22 16:57:20,227 - INFO - [protein_validator_v2.py:614] - ├── 🍭  Identifying glycosylation sites...
2025-05-22 16:57:20,236 - INFO - [protein_validator_v2.py:619] - │   └── ✓ Glycosylation sites identified
2025-05-22 16:57:20,241 - INFO - [protein_validator_v2.py:637] - ├── 🧩 Generating structural model (antibody)...


│   └── ✓ Immunogenicity assessment complete
├── 🔥 Evaluating stability...
│   └── ✓ Stability evaluation complete
├── 🧱 Predicting aggregation propensity...
│   └── ✓ Aggregation prediction complete
├── 🍭 Identifying glycosylation sites...
│   └── ✓ Glycosylation sites identified
├── 🧩 Generating structural model (antibody)...


2025-05-22 16:57:38,955 - INFO - [protein_validator_v2.py:665] - ├── 🔗 Calculating binding affinity...
2025-05-22 16:57:38,959 - INFO - [protein_validator_v2.py:720] - ├── 🎯 Predicting epitopes...


│   └── ⚠️ Antibody structural model generation failed
├── 🔗 Calculating binding affinity...
│   └── ⚠️ Skipping binding affinity: missing antibody PDB structure (prediction failed or not available)
├── 🎯 Predicting epitope regions...


2025-05-22 16:58:38,330 - INFO - [protein_validator_v2.py:725] - │   └── ✓ Epitope prediction complete
2025-05-22 16:58:38,336 - INFO - [protein_validator_v2.py:742] - ├── 🌐 Analyzing sequence conservancy...
2025-05-22 16:58:38,373 - INFO - [protein_validator_v2.py:755] - │   └── ✓ Conservancy analysis complete
2025-05-22 16:58:38,379 - INFO - [protein_validator_v2.py:772] - ├── 🔧 Assessing developability...


│   └── ✓ Epitope prediction complete
├── 🌐 Analyzing sequence conservancy...
│   └── ✓ Conservancy analysis complete
├── 🔧 Assessing developability...


2025-05-22 16:58:41,550 - INFO - [protein_validator_v2.py:777] - │   └── ✓ Developability assessment complete
2025-05-22 16:58:41,552 - INFO - [protein_validator_v2.py:803] - └── 📊 Collection of metrics complete...
2025-05-22 16:58:41,555 - INFO - [protein_validator_v2.py:811] - 

✅ Successfully processed protein QVQLVESGGGVVQPGRSLRL...


2025-05-22 16:58:41,576 - INFO - [protein_validator_v2.py:1206] - Appended to realtime CSV backup: protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/realtime_validation_attempts_backup.csv for protein 1
2025-05-22 16:58:41,627 - INFO - [protein_validator_v2.py:1219] - Updated realtime JSON backup: protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/realtime_successful_metrics_for_ranking.json with 1 successful proteins.
2025-05-22 16:58:41,630 - INFO - [protein_validator_v2.py:502] - ├── 🧬 Running BLAST analysis....


│   └── ✓ Developability assessment complete
└── 📊 Collection of metrics complete


✅ Successfully processed protein QVQLVESGGGVVQPGRSLRL...



Validating protein 2/4: MTPVFLGTPSVTQGIPGAGG...
🧪 Processing protein sequence: MTPVFLGTPSVTQGIPGAGG...
├── Calculating basic properties...
├── 🧬 Running BLAST analysis...


2025-05-22 17:04:46,979 - INFO - [protein_validator_v2.py:507] - │   └── ✓ BLAST complete
2025-05-22 17:04:46,989 - INFO - [protein_validator_v2.py:524] - ├── 🔍 Calculating ProtParam properties...
2025-05-22 17:04:46,995 - INFO - [protein_validator_v2.py:529] - │   └── ✓ ProtParam complete
2025-05-22 17:04:47,001 - INFO - [protein_validator_v2.py:547] - ├── 🦠 Assessing immunogenicity...


│   └── ✓ BLAST complete
├── 🔍 Analyzing ProtParam properties...
│   └── ✓ ProtParam complete
├── 🦠 Assessing immunogenicity...


2025-05-22 17:04:54,912 - INFO - [protein_validator_v2.py:552] - │   └── ✓ Immunogenicity assessment complete
2025-05-22 17:04:54,916 - INFO - [protein_validator_v2.py:570] - ├── 🔥 Evaluating stability...
2025-05-22 17:04:54,923 - INFO - [protein_validator_v2.py:575] - │   └── ✓ Stability evaluation complete
2025-05-22 17:04:54,929 - INFO - [protein_validator_v2.py:592] - ├── 🧱 Predicting aggregation propensity...
2025-05-22 17:04:54,937 - INFO - [protein_validator_v2.py:597] - │   └── ✓ Aggregation prediction complete
2025-05-22 17:04:54,943 - INFO - [protein_validator_v2.py:614] - ├── 🍭  Identifying glycosylation sites...
2025-05-22 17:04:54,948 - INFO - [protein_validator_v2.py:619] - │   └── ✓ Glycosylation sites identified
2025-05-22 17:04:54,953 - INFO - [protein_validator_v2.py:637] - ├── 🧩 Generating structural model (antibody)...


│   └── ✓ Immunogenicity assessment complete
├── 🔥 Evaluating stability...
│   └── ✓ Stability evaluation complete
├── 🧱 Predicting aggregation propensity...
│   └── ✓ Aggregation prediction complete
├── 🍭 Identifying glycosylation sites...
│   └── ✓ Glycosylation sites identified
├── 🧩 Generating structural model (antibody)...


2025-05-22 17:05:13,132 - INFO - [protein_validator_v2.py:645] - │   └── ✓ Antibody structural model generated: protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/validator_antibody_pdbs/antibody_C0H0N17O0S42_20250522_170513.pdb
2025-05-22 17:05:13,139 - INFO - [protein_validator_v2.py:665] - ├── 🔗 Calculating binding affinity...


│   └── ✓ Antibody structural model generated
├── 🔗 Calculating binding affinity...
Processing files:
- protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/validator_antigen_pdbs/antigen_structure_pred_20250522_164909.pdb
- protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/validator_antibody_pdbs/antibody_C0H0N17O0S42_20250522_170513.pdb
Using system type: linux
Combining PDB files into complex.pdb
Running command: prodigy complex.pdb


2025-05-22 17:05:14,454 - INFO - [protein_validator_v2.py:679] - │   └── ✓ Binding affinity calculated against antigen PDB predicted_user_sequence
2025-05-22 17:05:14,457 - INFO - [protein_validator_v2.py:720] - ├── 🎯 Predicting epitopes...



Errors:
Traceback (most recent call last):
  File "/home/mino_solo/anaconda3/envs/bio/bin/prodigy", line 8, in <module>
    sys.exit(main())
  File "/home/mino_solo/anaconda3/envs/bio/lib/python3.10/site-packages/prodigy_prot/cli.py", line 114, in main
    prodigy.predict(
  File "/home/mino_solo/anaconda3/envs/bio/lib/python3.10/site-packages/prodigy_prot/modules/prodigy.py", line 149, in predict
    self.ic_network = calculate_ic(
  File "/home/mino_solo/anaconda3/envs/bio/lib/python3.10/site-packages/prodigy_prot/modules/prodigy.py", line 42, in calculate_ic
    raise ValueError("No contacts found for selection")
ValueError: No contacts found for selection

Error during prediction: Traceback (most recent call last):
  File "/home/mino_solo/anaconda3/envs/bio/bin/prodigy", line 8, in <module>
    sys.exit(main())
  File "/home/mino_solo/anaconda3/envs/bio/lib/python3.10/site-packages/prodigy_prot/cli.py", line 114, in main
    prodigy.predict(
  File "/home/mino_solo/anaconda3/envs/

2025-05-22 17:07:15,004 - INFO - [protein_validator_v2.py:725] - │   └── ✓ Epitope prediction complete
2025-05-22 17:07:15,009 - INFO - [protein_validator_v2.py:742] - ├── 🌐 Analyzing sequence conservancy...
2025-05-22 17:07:15,023 - INFO - [protein_validator_v2.py:772] - ├── 🔧 Assessing developability...


API returned error: 
	

	



│   └── ✓ Epitope prediction complete
├── 🌐 Analyzing sequence conservancy...
│   └── ⚠️ Conservancy prediction failed: Skipped: dependent epitope prediction was skipped or failed.
├── 🔧 Assessing developability...


2025-05-22 17:07:22,697 - INFO - [protein_validator_v2.py:777] - │   └── ✓ Developability assessment complete
2025-05-22 17:07:22,701 - INFO - [protein_validator_v2.py:803] - └── 📊 Collection of metrics complete...
2025-05-22 17:07:22,715 - INFO - [protein_validator_v2.py:811] - 

✅ Successfully processed protein MTPVFLGTPSVTQGIPGAGG...


2025-05-22 17:07:22,750 - INFO - [protein_validator_v2.py:1206] - Appended to realtime CSV backup: protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/realtime_validation_attempts_backup.csv for protein 2
2025-05-22 17:07:22,891 - INFO - [protein_validator_v2.py:1219] - Updated realtime JSON backup: protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/realtime_successful_metrics_for_ranking.json with 2 successful proteins.
2025-05-22 17:07:22,895 - INFO - [protein_validator_v2.py:502] - ├── 🧬 Running BLAST analysis....


│   └── ✓ Developability assessment complete
└── 📊 Collection of metrics complete


✅ Successfully processed protein MTPVFLGTPSVTQGIPGAGG...



Validating protein 3/4: EVQLVESGGGVVQPGRSLRL...
🧪 Processing protein sequence: EVQLVESGGGVVQPGRSLRL...
├── Calculating basic properties...
├── 🧬 Running BLAST analysis...


2025-05-22 17:10:29,613 - INFO - [protein_validator_v2.py:507] - │   └── ✓ BLAST complete
2025-05-22 17:10:29,616 - INFO - [protein_validator_v2.py:524] - ├── 🔍 Calculating ProtParam properties...
2025-05-22 17:10:29,618 - INFO - [protein_validator_v2.py:529] - │   └── ✓ ProtParam complete
2025-05-22 17:10:29,621 - INFO - [protein_validator_v2.py:547] - ├── 🦠 Assessing immunogenicity...


│   └── ✓ BLAST complete
├── 🔍 Analyzing ProtParam properties...
│   └── ✓ ProtParam complete
├── 🦠 Assessing immunogenicity...


2025-05-22 17:10:36,424 - INFO - [protein_validator_v2.py:552] - │   └── ✓ Immunogenicity assessment complete
2025-05-22 17:10:36,427 - INFO - [protein_validator_v2.py:570] - ├── 🔥 Evaluating stability...
2025-05-22 17:10:36,430 - INFO - [protein_validator_v2.py:575] - │   └── ✓ Stability evaluation complete
2025-05-22 17:10:36,433 - INFO - [protein_validator_v2.py:592] - ├── 🧱 Predicting aggregation propensity...
2025-05-22 17:10:36,444 - INFO - [protein_validator_v2.py:597] - │   └── ✓ Aggregation prediction complete
2025-05-22 17:10:36,454 - INFO - [protein_validator_v2.py:614] - ├── 🍭  Identifying glycosylation sites...
2025-05-22 17:10:36,460 - INFO - [protein_validator_v2.py:619] - │   └── ✓ Glycosylation sites identified
2025-05-22 17:10:36,468 - INFO - [protein_validator_v2.py:637] - ├── 🧩 Generating structural model (antibody)...


│   └── ✓ Immunogenicity assessment complete
├── 🔥 Evaluating stability...
│   └── ✓ Stability evaluation complete
├── 🧱 Predicting aggregation propensity...
│   └── ✓ Aggregation prediction complete
├── 🍭 Identifying glycosylation sites...
│   └── ✓ Glycosylation sites identified
├── 🧩 Generating structural model (antibody)...


2025-05-22 17:10:53,751 - INFO - [protein_validator_v2.py:645] - │   └── ✓ Antibody structural model generated: protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/validator_antibody_pdbs/antibody_C7H7N13O0S49_20250522_171053.pdb
2025-05-22 17:10:53,755 - INFO - [protein_validator_v2.py:665] - ├── 🔗 Calculating binding affinity...


│   └── ✓ Antibody structural model generated
├── 🔗 Calculating binding affinity...
Processing files:
- protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/validator_antigen_pdbs/antigen_structure_pred_20250522_164909.pdb
- protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/validator_antibody_pdbs/antibody_C7H7N13O0S49_20250522_171053.pdb
Using system type: linux
Combining PDB files into complex.pdb
Running command: prodigy complex.pdb


2025-05-22 17:10:54,676 - INFO - [protein_validator_v2.py:679] - │   └── ✓ Binding affinity calculated against antigen PDB predicted_user_sequence
2025-05-22 17:10:54,678 - INFO - [protein_validator_v2.py:720] - ├── 🎯 Predicting epitopes...



Errors:
Traceback (most recent call last):
  File "/home/mino_solo/anaconda3/envs/bio/bin/prodigy", line 8, in <module>
    sys.exit(main())
  File "/home/mino_solo/anaconda3/envs/bio/lib/python3.10/site-packages/prodigy_prot/cli.py", line 114, in main
    prodigy.predict(
  File "/home/mino_solo/anaconda3/envs/bio/lib/python3.10/site-packages/prodigy_prot/modules/prodigy.py", line 149, in predict
    self.ic_network = calculate_ic(
  File "/home/mino_solo/anaconda3/envs/bio/lib/python3.10/site-packages/prodigy_prot/modules/prodigy.py", line 42, in calculate_ic
    raise ValueError("No contacts found for selection")
ValueError: No contacts found for selection

Error during prediction: Traceback (most recent call last):
  File "/home/mino_solo/anaconda3/envs/bio/bin/prodigy", line 8, in <module>
    sys.exit(main())
  File "/home/mino_solo/anaconda3/envs/bio/lib/python3.10/site-packages/prodigy_prot/cli.py", line 114, in main
    prodigy.predict(
  File "/home/mino_solo/anaconda3/envs/

2025-05-22 17:12:15,680 - INFO - [protein_validator_v2.py:725] - │   └── ✓ Epitope prediction complete
2025-05-22 17:12:15,686 - INFO - [protein_validator_v2.py:742] - ├── 🌐 Analyzing sequence conservancy...
2025-05-22 17:12:15,741 - INFO - [protein_validator_v2.py:755] - │   └── ✓ Conservancy analysis complete
2025-05-22 17:12:15,746 - INFO - [protein_validator_v2.py:772] - ├── 🔧 Assessing developability...


│   └── ✓ Epitope prediction complete
├── 🌐 Analyzing sequence conservancy...
│   └── ✓ Conservancy analysis complete
├── 🔧 Assessing developability...


2025-05-22 17:12:19,618 - INFO - [protein_validator_v2.py:777] - │   └── ✓ Developability assessment complete
2025-05-22 17:12:19,623 - INFO - [protein_validator_v2.py:803] - └── 📊 Collection of metrics complete...
2025-05-22 17:12:19,626 - INFO - [protein_validator_v2.py:811] - 

✅ Successfully processed protein EVQLVESGGGVVQPGRSLRL...


2025-05-22 17:12:19,660 - INFO - [protein_validator_v2.py:1206] - Appended to realtime CSV backup: protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/realtime_validation_attempts_backup.csv for protein 3
2025-05-22 17:12:19,762 - INFO - [protein_validator_v2.py:1219] - Updated realtime JSON backup: protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/realtime_successful_metrics_for_ranking.json with 3 successful proteins.
2025-05-22 17:12:19,764 - INFO - [protein_validator_v2.py:502] - ├── 🧬 Running BLAST analysis....


│   └── ✓ Developability assessment complete
└── 📊 Collection of metrics complete


✅ Successfully processed protein EVQLVESGGGVVQPGRSLRL...



Validating protein 4/4: MNTTLTTPSFGGGGSGGGGS...
🧪 Processing protein sequence: MNTTLTTPSFGGGGSGGGGS...
├── Calculating basic properties...
├── 🧬 Running BLAST analysis...


2025-05-22 17:17:24,941 - INFO - [protein_validator_v2.py:507] - │   └── ✓ BLAST complete
2025-05-22 17:17:24,951 - INFO - [protein_validator_v2.py:524] - ├── 🔍 Calculating ProtParam properties...
2025-05-22 17:17:24,958 - INFO - [protein_validator_v2.py:529] - │   └── ✓ ProtParam complete
2025-05-22 17:17:24,961 - INFO - [protein_validator_v2.py:547] - ├── 🦠 Assessing immunogenicity...


│   └── ✓ BLAST complete
├── 🔍 Analyzing ProtParam properties...
│   └── ✓ ProtParam complete
├── 🦠 Assessing immunogenicity...


2025-05-22 17:17:29,162 - INFO - [protein_validator_v2.py:552] - │   └── ✓ Immunogenicity assessment complete
2025-05-22 17:17:29,167 - INFO - [protein_validator_v2.py:570] - ├── 🔥 Evaluating stability...
2025-05-22 17:17:29,171 - INFO - [protein_validator_v2.py:575] - │   └── ✓ Stability evaluation complete
2025-05-22 17:17:29,181 - INFO - [protein_validator_v2.py:592] - ├── 🧱 Predicting aggregation propensity...
2025-05-22 17:17:29,188 - INFO - [protein_validator_v2.py:597] - │   └── ✓ Aggregation prediction complete
2025-05-22 17:17:29,196 - INFO - [protein_validator_v2.py:614] - ├── 🍭  Identifying glycosylation sites...
2025-05-22 17:17:29,203 - INFO - [protein_validator_v2.py:619] - │   └── ✓ Glycosylation sites identified
2025-05-22 17:17:29,210 - INFO - [protein_validator_v2.py:637] - ├── 🧩 Generating structural model (antibody)...


│   └── ✓ Immunogenicity assessment complete
├── 🔥 Evaluating stability...
│   └── ✓ Stability evaluation complete
├── 🧱 Predicting aggregation propensity...
│   └── ✓ Aggregation prediction complete
├── 🍭 Identifying glycosylation sites...
│   └── ✓ Glycosylation sites identified
├── 🧩 Generating structural model (antibody)...


2025-05-22 17:17:48,018 - INFO - [protein_validator_v2.py:665] - ├── 🔗 Calculating binding affinity...
2025-05-22 17:17:48,028 - INFO - [protein_validator_v2.py:720] - ├── 🎯 Predicting epitopes...


│   └── ⚠️ Antibody structural model generation failed
├── 🔗 Calculating binding affinity...
│   └── ⚠️ Skipping binding affinity: missing antibody PDB structure (prediction failed or not available)
├── 🎯 Predicting epitope regions...


2025-05-22 17:18:24,452 - INFO - [protein_validator_v2.py:725] - │   └── ✓ Epitope prediction complete
2025-05-22 17:18:24,457 - INFO - [protein_validator_v2.py:742] - ├── 🌐 Analyzing sequence conservancy...
2025-05-22 17:18:24,478 - INFO - [protein_validator_v2.py:755] - │   └── ✓ Conservancy analysis complete
2025-05-22 17:18:24,484 - INFO - [protein_validator_v2.py:772] - ├── 🔧 Assessing developability...


│   └── ✓ Epitope prediction complete
├── 🌐 Analyzing sequence conservancy...
│   └── ✓ Conservancy analysis complete
├── 🔧 Assessing developability...


2025-05-22 17:18:27,381 - INFO - [protein_validator_v2.py:777] - │   └── ✓ Developability assessment complete
2025-05-22 17:18:27,383 - INFO - [protein_validator_v2.py:803] - └── 📊 Collection of metrics complete...
2025-05-22 17:18:27,385 - INFO - [protein_validator_v2.py:811] - 

✅ Successfully processed protein MNTTLTTPSFGGGGSGGGGS...


2025-05-22 17:18:27,393 - INFO - [protein_validator_v2.py:1206] - Appended to realtime CSV backup: protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/realtime_validation_attempts_backup.csv for protein 4
2025-05-22 17:18:27,519 - INFO - [protein_validator_v2.py:1219] - Updated realtime JSON backup: protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/realtime_successful_metrics_for_ranking.json with 4 successful proteins.
2025-05-22 17:18:27,522 - INFO - [protein_validator_v2.py:1233] - Saving validation results to CSV: protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/valida

│   └── ✓ Developability assessment complete
└── 📊 Collection of metrics complete


✅ Successfully processed protein MNTTLTTPSFGGGGSGGGGS...



Validation via validate_protein_list complete:
├── Total proteins attempted: 4
├── Successfully validated (metrics collected): 4
└── Failed validation or no metrics: 0

INFO: Validation results for 4 proteins saved to protein_notebook_outputs/batch_processor_results/ranking_run_20250522_164850/validation_attempts_summary.csv


2025-05-22 17:18:27,606 - INFO - [protein_ranker.py:792] - Validation Phase: Total Attempted: 4, Successful for Ranking: 4, Failed/Skipped Validation: 0
2025-05-22 17:18:27,609 - INFO - [protein_ranker.py:354] - 🎯 Ranking 4 proteins (Ranker V2 logic)...
📊 Calculating scores (Ranker V2):   0%|          | 0/4 [00:00<?, ?protein/s]2025-05-22 17:18:27,619 - INFO - [protein_ranker.py:234] - Category 'Structure' for QVQLVESGGGVVQPGRSLRL was 'An unexpected error occurred in predict_structure: 'NoneType' object has no attribute 'get''. Score set to 0.
2025-05-22 17:18:27,624 - INFO - [protein_ranker.py:234] - Category 'Binding Affinity' for MTPVFLGTPSVTQGIPGAGG was 'Error during prediction: Traceback (most recent call last):
  File "/home/mino_solo/anaconda3/envs/bio/bin/prodigy", line 8, in <module>
    sys.exit(main())
  File "/home/mino_solo/anaconda3/envs/bio/lib/python3.10/site-packages/prodigy_prot/cli.py", line 114, in main
    prodigy.predict(
  File "/home/mino_solo/anaconda3/envs/bio

📊 Successfully processed and ranked 4 proteins.
🏆 Top 5 ranked proteins (from example_proteins.prot):
  1. Name: SeqFromFile_L3 - Score: 0.3701
  2. Name: SeqFromFile_L4 - Score: 0.2757
  3. Name: SeqFromFile_L1 - Score: 0.2405
  4. Name: SeqFromFile_L2 - Score: 0.0745
✨ Batch processing complete!
📁 Results and detailed logs from the run are in a subdirectory within: protein_notebook_outputs/batch_processor_results
📋 Batch processor log: protein_notebook_outputs/batch_processor_results/batch_processor_20250522_164850.log

Batch processing complete. Check the directory: protein_notebook_outputs/batch_processor_results
This directory should contain:
- Overall ranking CSV and PDF (if protein_report_generator is available).
- `protein_reports/` subdirectory with individual PDF reports (because generate_pdf=True).
- `pdbs/` subdirectory with downloaded/used PDB files.
- Log files (`processing.log`, `failed_proteins.txt`).


## Define Output Directory and Example Sequences

In [2]:
# Define a directory to store results from this notebook
NOTEBOOK_OUTPUT_DIR = Path("protein_notebook_outputs")
NOTEBOOK_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
print(f"Output directory for this notebook: {NOTEBOOK_OUTPUT_DIR.resolve()}\n")

# Example protein sequences
protein_sequences_list = [
    {
        "name": "AB_001_Heavy",
        "sequence": "EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMSWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCAKAGISGSGGSYFDYWGQGTLVTVSS",
        "antigen_id": "1FBI" # Optional: specify an antigen PDB ID for this protein
    },
    {
        "name": "AB_002_Light",
        "sequence": "DIQMTQSPSSLSASVGDRVTITCRASQGISRWLAWYQQKPGKAPKLLIYAASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQYNSYPPTFGQGTKVEIK",
        # No antigen_id specified, will use default or validator's target_antigen_pdb_id
    },
    {
        "name": "AB_003_Heavy_Problematic",
        "sequence": "EVHLLESGGGLVQPGGSMKLSCVASGFTFSDAYYMNWVRQSPEKGLEWVAEIRNKPYNYETYYSDSVKGRFTISRDDSKNAVYLQMNGLRAEDTGIYYCARYDYDILTGYYYYMDVWGKGTTVTVSS",
        "antigen_id": "2VIR"
    },
    {
        "name": "AB_004_Short_Invalid",
        "sequence": "ACDEFGHIKLMNPQRSTVWY", # Too short, should fail validation
    }
]

# Create a dummy FASTA-like input file for BatchProteinProcessor
dummy_sequence_file = NOTEBOOK_OUTPUT_DIR / "example_proteins.fasta"
with open(dummy_sequence_file, 'w') as f:
    for ab in protein_sequences_list:
        if ab['sequence']: # Only write if sequence exists
            f.write(f">{ab['name']}\n") # Can add [antigen_id={ab.get('antigen_id', '')}] if BatchProcessor supports it
            f.write(f"{ab['sequence']}\n")
print(f"Created dummy sequence file: {dummy_sequence_file}")

# List some available internal PDB IDs that can be used as antigens
print("\nSome available internal antigen PDB IDs:")
internal_antigens = list_internal_pdb_ids()
if internal_antigens:
    print(internal_antigens[:5]) # Print first 5
else:
    print("No internal antigen PDBs found.")

Output directory for this notebook: /home/mino_solo/moremi_toolkits/moremi-biokit/protein_notebook_outputs

Created dummy sequence file: protein_notebook_outputs/example_proteins.fasta

Some available internal antigen PDB IDs:
['6mpv', 'ANZ81575.1 PstS', 'CAA48354.1 HBV', 'CAA87404.1', 'CCE35749.1 mmpL3']


## Antibody Validation

In [None]:
step_by_step_output_dir = NOTEBOOK_OUTPUT_DIR / "step_by_step_results"
step_by_step_output_dir.mkdir(parents=True, exist_ok=True)
validator_pdb_path = step_by_step_output_dir / "validator_pdbs"
validator_pdb_path.mkdir(parents=True, exist_ok=True)

# Instantiate ProteinValidator
# We can specify a target_antigen_pdb_id, or it will pick one randomly from internal PDBs if available.
chosen_antigen_for_validator = internal_antigens[0] if internal_antigens else None
validator = ProteinValidator(
    pdb_files_path=str(validator_pdb_path), 
    target_antigen_pdb_id=chosen_antigen_for_validator
)

print(f"Using target antigen for Validator: {validator.target_antigen_pdb_id}\n")

validation_results = []
sequences_to_validate = [ab['sequence'] for ab in protein_sequences_list if ab['sequence']]

# Using process_proteins (which takes a file path)
# For this example, let's re-use the dummy_sequence_file for simplicity with process_proteins
print(f"Validating sequences from file: {dummy_sequence_file}")
validation_results = validator.process_proteins(str(dummy_sequence_file), str(step_by_step_output_dir / "validation_run"))

# Alternatively, to process a list of sequences one by one:
# validated_metrics_list = validator.validate_proteins(sequences_to_validate) 
# print(f"Validated {len(validated_metrics_list)} proteins successfully via validate_proteins method.")

successful_metrics_list = validator.get_successful_metrics(validation_results)

print(f"\nTotal sequences processed by validator: {len(validation_results)}")
print(f"Number of successfully validated proteins: {len(successful_metrics_list)}")
print(f"Number of failed validations: {len(validation_results) - len(successful_metrics_list)}")

if successful_metrics_list:
    print("\nExample metrics for the first successfully validated protein:")
    first_valid_metrics_dict = successful_metrics_list[0].to_dict()
    print(f"  Sequence: {first_valid_metrics_dict['sequence'][:30]}...")
    print(f"  Antigen Used: {first_valid_metrics_dict['antigen']}")
    print(f"  Molecular Weight: {first_valid_metrics_dict['molecular_weight']}")
    print(f"  Total Score (from validator internal scoring): {first_valid_metrics_dict['total_score']}")
    # print(first_valid_metrics_dict['metrics']['protparam'])
    # You can explore first_valid_metrics_dict['metrics'] for detailed results from each tool
else:
    print("\nNo proteins were successfully validated in the step-by-step approach.")

## Antibody Ranking

In [3]:
if successful_metrics_list:
    ranker_output_dir = step_by_step_output_dir / "ranking_run"
    ranker_output_dir.mkdir(parents=True, exist_ok=True)

    # Instantiate ProteinRanker
    # Control PDF/CSV generation for individual protein reports
    ranker = ProteinRanker(
        generate_pdf=True, 
        generate_csv=True 
    )
    ranker.set_output_directory(str(ranker_output_dir))

    print(f"\nRanking {len(successful_metrics_list)} validated proteins...")
    print(f"Ranker outputs (including individual reports if enabled) will be in: {ranker_output_dir}\n")
    
    ranked_df = ranker.rank_proteins(successful_metrics_list)

    print("Top ranked proteins (DataFrame view):")
    display(ranked_df.head())

    # Get results as a list of dictionaries
    ranked_dicts = ranker.get_ranking_results_as_dict()
    if ranked_dicts:
        print("\nExample of first ranked protein (dictionary view):")
        # print(ranked_dicts[0]) # Full dict might be too verbose
        print(f"  Sequence: {ranked_dicts[0]['sequence'][:30]}...")
        print(f"  Total Score: {ranked_dicts[0]['total_score']}")
        print(f"  Antigen: {ranked_dicts[0]['antigen']}")
        print(f"  Molecular Formula: {ranked_dicts[0]['molecular_formula']}")
    
    print(f"\nRanking complete. Check {ranker_output_dir} for:")
    print("- Overall ranking CSV (`rankings/rankings_*.csv`).")
    print("- Overall ranking PDF (`rankings/ranking_report_*.pdf`) if protein_report_generator is available.")
    print("- `protein_reports/` subdirectory with individual PDF and CSV reports (as generate_pdf=True, generate_csv=True).")
else:
    print("\nNo successfully validated proteins to rank.")

NameError: name 'successful_metrics_list' is not defined

## Custom Scoring (Optional)

In [2]:
if successful_metrics_list:
    custom_scoring_output_dir = step_by_step_output_dir / "custom_scoring_run"
    custom_scoring_output_dir.mkdir(parents=True, exist_ok=True)
    
    # Define custom weights
    custom_weights = {
        MetricCategory.BINDING_AFFINITY: 0.40, # Increased weight
        MetricCategory.STRUCTURE: 0.15,        # Decreased weight
        MetricCategory.GLYCOSYLATION: 0.10,
        MetricCategory.AGGREGATION: 0.10,
        MetricCategory.PROTPARAM: 0.10,
        MetricCategory.IMMUNOGENICITY: 0.05,
        MetricCategory.CONSERVANCY: 0.05,
        MetricCategory.STABILITY: 0.05,
        MetricCategory.EPITOPE: 0.00,      # Zero weight
        MetricCategory.DEVELOPABILITY: 0.00 # Zero weight
    }
    
    custom_config = ScoringConfig()
    custom_config.category_weights = custom_weights
    
    # Instantiate ProteinRanker with custom config
    custom_ranker = ProteinRanker(
        config=custom_config, 
        generate_pdf=False, # Disable individual PDFs for this run
        generate_csv=True   # Only generate overall ranking CSV and individual CSVs
    )
    custom_ranker.set_output_directory(str(custom_scoring_output_dir))
    
    print(f"\nRanking with custom scoring config...")
    print(f"Custom ranker outputs will be in: {custom_scoring_output_dir}\n")
    custom_ranked_df = custom_ranker.rank_proteins(successful_metrics_list)
    
    print("Top ranked proteins (custom scoring):")
    display(custom_ranked_df.head())
else:
    print("\nSkipping custom scoring example as no proteins were successfully validated.")

NameError: name 'successful_metrics_list' is not defined