# Peptide Cleavage and Reconstruction Tests

This notebook is used to test enzyme cleavage functions (e.g., trypsin and chymotrypsin) 
and to validate reconstruction logic. These experiments help verify enzyme rule behavior 
and fragment boundary correctness.

In [None]:
# Standard imports
import sys
import os
from importlib import reload
import pprint 

# Add parent directory to path so src can be imported
sys.path.append(os.path.abspath(".."))

# Project imports
from src.enzymes import trypsin_cleave, chymotrypsin_cleave
from src.sequence_tools import validate_ordered_fragments, reconstruct_from_ordered


In [None]:
# ---------------------------------------------------------------------
# Jupyter development helpers
# ---------------------------------------------------------------------
%load_ext autoreload
%autoreload 2


## 1. Cleavage Function Tests

This section is to run trypsin and chymotrypsin cleavage on example sequences 
to verify fragment outputs match expectations.

In [None]:
test_seq_trypsin = "AKRMKYP"
frags_trypsin = trypsin_cleave(test_seq_trypsin)
print(f"Trypsin fragments: {frags_trypsin}")

## 2. Fragment Validation
Check whether generated (or manually provided) fragments are valid according to enzyme cleavage rules.

In [None]:
# valid, details = validate_ordered_fragments(
#     fragments = ['Ak', 'R', 'mk', 'yP'], # or frags_trypsin if defined
#     cut_after={"R", "K"},
#     block_if_next={"P"}
# )
# print(f"Is valid: {valid}")
# pprint.pprint(details)

In [None]:
fragments = ['Ak', 'R', 'Xk', 'yP'] # or frags_trypsin if defined))

In [None]:
valid, details = validate_ordered_fragments(fragments)
print(f"Is valid: {valid}")
pprint.pprint(details)

In [None]:
VALID_AMINO_ACIDS = {
    "A",
    "C",
    "D",
    "E",
    "F",
    "G",
    "H",
    "I",
    "K",
    "L",
    "M",
    "N",
    "P",
    "Q",
    "R",
    "S",
    "T",
    "V",
    "W",
    "Y",
}

In [None]:
def _validate_residues(fragments: list[str]):

    if not fragments:
        # The list is empty
        return False, {"error": "empty_fragment_list"}
    
    normalized = []
    
    for frag_index, raw_frag in enumerate(fragments):
        frag = raw_frag.strip().upper()
        if frag == "":
            # The fragment is empty
            return False, {
                "error": "empty fragment",
                "fragment_index": frag_index,
            }

        for char_index, aa in enumerate(frag):
            if aa not in VALID_AMINO_ACIDS:
                return False, {
                    "error": "invalid_residue",
                    "residue_index": char_index,
                    "residue": aa,
                    "fragment": frag
                }
        normalized.append(frag)
    
    return True, {"normalized": normalized, "total_fragments": len(normalized)}



In [None]:
#testing residues and edge cases

fragments = []
print(_validate_residues(fragments))

fragments2 = ['aK', '', 'MXk', 'Yp']
print(_validate_residues(fragments2))

fragments3 = ['aK', 'R', 'MXk', 'Yp']
print(_validate_residues(fragments3))

fragments4 = ['aK', 'R', 'Mk', 'Yp']

print(_validate_residues(fragments4))

In [None]:
# work on: valid_set: set[str]
#     if check_position == "start":
#             # Check that no fragment begins with the block if next amino acid
#             for i, frag in enumerate(fragments[1:]):
#                 if frag[0] not in VALID_AMINO_ACIDS:
#                     return False, {
#                         "error": "invalid_character",
#                         "frag_index": i + 1,
#                         "residue": frag[0],
#                         "fragment": frag,
#                     }

#         if check_position == "end":
#             pass

#         return True

In [None]:
# # Check that each fragment was cutoff after the appropriate amino acid
#     for i, frag in enumerate(fragments[:-1]):
#         if frag[-1] not in cut_after:
#             return False, {
#                 "error": "invalid_cutoff",
#                 "frag_index": i,
#                 "residue": frag[-1],
#                 "fragment": frag,
#             }
#     # Check that no fragment begins with the block if next amino acid
#     for i, frag in enumerate(fragments[1:]):
#         if frag[0] in block_if_next:
#             return False, {
#                 "error": "blocked_by_next",
#                 "frag_index": i + 1,
#                 "residue": frag[0],
#                 "fragment": frag,
#             }

#     return True, {
#         "errors": [],
#         "total_fragments": len(fragments),
#         "checked_boundaries": len(fragments) - 1,
#     }