# AlBi 3
### Ãœbungsblatt 2
*Markus Apel, Mathias Husted*

### Aufgabe 1a)

Es handelt sich um das Protein "Immunoglobulin binding protein"

### Aufgabe 1b, 1c, 1d)



In [None]:
import numpy as np
from Bio.PDB import PDBParser
from Bio.PDB.Polypeptide import is_aa, three_to_index, index_to_one


class AtomReader:
    def __init__(self, filepath):
        self.parser = PDBParser(QUIET=True)
        self.structure = self.parser.get_structure("structure", filepath)
        self.atom_positions = {}
        # Initialize atom positions
        for model in self.structure:
            for chain in model:
                if chain.id not in self.atom_positions:
                    self.atom_positions[chain.id] = []
                for id, residue in enumerate(chain):
                    if is_aa(residue, standard=True):
                        self.atom_positions[chain.id].append({
                            "id": id,
                            "Residue": residue.get_resname(),
                            "C": None,
                            "O": None,
                            "N": None,
                            "H": None
                        })
                        if "C" in residue:
                            self.atom_positions[chain.id][id]["C"] = residue["C"].get_coord()
                        if "O" in residue:
                            self.atom_positions[chain.id][id]["O"] = residue["O"].get_coord()
                        if "N" in residue:
                            self.atom_positions[chain.id][id]["N"] = residue["N"].get_coord()
                        if "H" in residue:
                            self.atom_positions[chain.id][id]["H"] = residue["H"].get_coord()
    
    def get_sequence(self):
        sequences = {}
        for model in self.structure:
            for chain in model:
                seq = ""
                for residue in chain:
                    if is_aa(residue, standard=True):
                        resname = residue.get_resname()
                        #seq += residue.get_resname() + " "
                        seq += index_to_one(three_to_index(resname))
                sequences[chain.id] = seq
        return sequences
    
    def get_positions(self):
        return self.atom_positions
    
    def get_energy(self):
        if self.atom_positions is None:
            return None
        
        h_bond = {}

        for chain_id, residues in self.atom_positions.items():
            for i, donor in enumerate(residues):
                if donor["N"] is None or donor["H"] is None:
                    continue
                for j, acceptor in enumerate(residues):
                    if acceptor["O"] is None or acceptor["C"] is None:
                        continue
                    if abs(i - j) < 2:
                        continue 

                    r_on = np.linalg.norm(acceptor["O"] - donor["N"])
                    r_ch = np.linalg.norm(acceptor["C"] - donor["H"])
                    r_oh = np.linalg.norm(acceptor["O"] - donor["H"])
                    r_cn = np.linalg.norm(acceptor["C"] - donor["N"])

                    e = 0.084 * ((1/r_on) + (1/r_ch) - (1/r_oh) - (1/r_cn)) * 332
                    #h_bond.append(e < -0.5)
                    if donor["id"] not in h_bond:
                        h_bond[donor["id"]] = {}
                    h_bond[donor["id"]][acceptor["id"]] = True if e < -0.5 else False

        return h_bond
    
    def sec_structure(self, h_bond):
        bond_type = {} # True = Turn, False = Bridge
        for donor in h_bond:
            for acceptor in h_bond[donor]:
                if not h_bond[donor][acceptor]:
                    continue
                if donor not in bond_type:
                    bond_type[donor] = {}
                dist = abs(acceptor - donor)
                #print(f"Current h_bond pair: h_bond[{donor}][{acceptor}]: {h_bond[donor][acceptor]} with distance {dist}")
                if dist in (3, 4, 5):
                    bond_type[donor][acceptor] = True
                else:
                    bond_type[donor][acceptor] = False
                    continue
        # Now our compact list of turns/bridges is created -> check for a-helices first
        last_donor, last_acceptor = None, None
        current_helix = []
        helices = []
        for donor in sorted(bond_type.keys()):
            for acceptor in sorted(bond_type[donor].keys()):
                if not bond_type[donor][acceptor]:
                    # Value is False => Bridge, not turn, hence can't be part of an a-helix
                    continue
                if last_donor is not None and last_acceptor is not None:
                    if (donor - last_donor == 1) and (acceptor - last_acceptor == 1):
                        current_helix.append((donor, acceptor))
                    else:
                        # >= 2 turns => helix!
                        if len(current_helix) >= 2:
                            helices.append(current_helix)
                        current_helix = [(donor, acceptor)]
                else:
                    current_helix = [(donor, acceptor)]

                last_donor, last_acceptor = donor, acceptor
        # Final helix (if exists)
        if len(current_helix) >= 2:
            helices.append(current_helix)
        

        # Final step: Merge helices which are within 2-3 residues
        merged_helices = []

        if helices:
            merged = helices[0]
            for h in helices[1:]:
                prev_end = merged[-1][1]
                next_start = h[0][0]
                if next_start - prev_end <= 3:
                    merged.extend(h)
                else:
                    merged_helices.append(merged)
                    merged = h
            merged_helices.append(merged)
        else:
            merged_helices = []

        # Last step: Beta sheets

        return merged_helices



reader = AtomReader("1PGA.pdb")

print(f"Sequence: {reader.get_sequence()['A']}")

h_bond = reader.get_energy()

helices = reader.sec_structure(h_bond)

for helix in helices:
    print(f"Alpha-helix detected in residues {helix[0][0]}-{helix[-1][1]}")

Sequence: MTYKLILNGKTLKGETTTEAVDAATAEKVFKQYANDNGVDGEWTYDDATKTFTVTE
[(36, 33), (37, 34)]
Alpha-helix detected in residues 25-32
Alpha-helix detected in residues 36-34
