## Calculate the total Hydrophobicity and total surface charge of proteins

In [None]:
#Amino acid sequences for PETases of interest 
#GuaPA
GuaPA = "VGQIDGVFTDENYGEYEVTIYYPAINSGVSQPPDKSGAPYPAIIFAHGWLTSKELYTWIGNYCAAQGYVTLLFSVPDPTSLEAFRQSVTGITKSIDYLLVQNQGGLLSGLINTSRIGVMGHSMGAMAVLIATTEDSRIKAAVSLAPGYFGSTTKKYVEACKSIRVPIQFQAGSLDKICPPSAVETYYNAVRIPPKEIIVINGADHIQFSDAPATLWANITLEEQHETSRKYFIAWFNYYLRDDFNYYAYLFGSEARKDMENGILSSLEYVERFDC"
#LCC: >LCC, leaf compost metagenome, highly similar to HRB29 locus GBD22443
LCC_PAZy = "MDGVLWRVRTAALMAALLALAAWALVWASPSVEAQSNPYQRGPNPTRSALTADGPFSVATYTVSRLSVSGFGGGVIYYPTGTSLTFGGIAMSPGYTADASSLAWLGRRLASHGFVVLVINTNSRFDYPDSRASQLSAALNYLRTSSPSAVRARLDANRLAVAGHSMGGGGTLRIAEQNPSLKAAVPLTPWHTDKTFNTSVPVLIVGAEADTVAPVSQHAIPFYQNLPSTTPKVYVELDNASHFAPNSNNAAISVYTISWMKLWVDNDTRYRQFLCNVNDPALSDFRTNNRHCQ"
LCC_ICCMpaper = "M35SNPYQRGPNPTRSALTADGPFSVATYTVSRLSVSGFGGGVIYYPTGTSLTFGGIAMSPGYTADASSLAWLGRRLASHGFVVLVINTNSRFDYPDSRASQLSAALNYLRTSSPSAVRARLDANRLAVAGHSMGGGGTLRIAEQNPSLKAAVPLTPWHTDKTFNTSVPVLIVGAEADTVAPVSQHAIPFYQNLPSTTPKVYVELDNASHFAPNSNNAAISVYTISWMKLWVDNDTRYRQFLCNVNDPALSDFRTNNRHCQLEHHHHHH"
#IsPETase >Ideonella sakaiensis 201-F6, ISPETase, ISF6_4831
IsPETase = "MNFPRASRLMQAAVLGGLMAVSAAATAQTNPYARGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCS"
#PHL7 >Thermoanaerobacter sp. , PHL-7 (PES-H1; PES-H2)
phl7 = "MANPYERGPDPTESSIEAVRGPFAVAQTTVSRLQADGFGGGTIYYPTDTSQGTFGAVAISPGFTAGQESIAWLGPRIASQGFVVITIDTITRLDQPDSRGRQLQAALDHLRTNSVVRNRIDPNRMAVMGHSMGGGGALSAAANNTSLEAAIPLQGWHTRKNWSSVRTPTLVVGAQLDTIAPVSSHSEAFYNSLPSDLDKAYMELRGASHLVSNTPDTTTAKYSIAWLKRFVDDDLRYEQFLCPAPDDFAISEYRSTCPFLEHHHHHH"
#HiC >Humicola insolens , HiC
HiC = "QLGAIENGLESGSANACPDAILIFARGSTEPGNMGITVGPALANGLESHIRNIWIQGVGGPYDAALATNFLPRGTSQANIDEGKRLFALANQKCPNTPVVAGGYSQGAALIAAAVSELSGAVKEQVKGVALFGYTQNLQNRGGIPNYPRERTKVFCNVGDAVCTGTLIITPAHLSYTIEARGEAARFLRDRIRA"
#FAST PETase>FASTPETase||sp|A0A0K8P6T7|PETH_IDESA Poly(ethylene terephthalate) hydrolase OS=Ideonella sakaiensis (strain NBRC 110686 / TISTR 2288 / 201-F6) OX=1547922 GN=ISF6_4831 PE=1 SV=1
FastPETase = "MNFPRASRLMQAAVLGGLMAVSAAATAQTNPYARGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPESRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWHSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSQNAKQFLEIKGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTAVSDFRTANCS"
#ICCM is LCC with F243I/D238C/S283C/N246M (ICCM)
iccm = "MSNPYQRGPNPTRSALTADGPFSVATYTVSRLSVSGFGGGVIYYPTGTSLTFGGIAMSPGYTADASSLAWLGRRLASHGFVVLVINTNSRFDYPDSRASQLSAALNYLRTSSPSAVRARLDANRLAVAGHSMGGGGTLRIAEQNPSLKAAVPLTPWHTDKTFNTSVPVLIVGAEADTVAPVSQHAIPFYQNLPSTTPKVYVELCNASHIAPMSNNAAISVYTISWMKLWVDNDTRYRQFLCNVNDPALCDFRTNNRHCQLEHHHHHH"

#Protein dictionary 
protein_list = {'GuaPA':GuaPA, 'LCC_PAZy':LCC_PAZy, 'LCC_ISSMpaper':LCC_ICCMpaper, 'ISPETase':IsPETase, 'PHL-7':phl7, 'HiC':HiC, 'FastPETase':FastPETase, 'ICCM':iccm}

In [None]:
from Bio import SeqIO
from Bio.SeqUtils import ProtParam

# Hydrophobicity scale: Kyte & Doolittle index of hydrophobicity
KD_SCALE = {
    'I': 4.5, 'V': 4.2, 'L': 3.8, 'F': 2.8, 'C': 2.5,
    'M': 1.9, 'A': 1.8, 'G': -0.4, 'T': -0.7, 'S': -0.8,
    'W': -0.9, 'Y': -1.3, 'P': -1.6, 'H': -3.2,
    'E': -3.5, 'Q': -3.5, 'D': -3.5, 'N': -3.5, 'K': -3.9,
    'R': -4.5
}

#Function to calculate hydrophobicity 

def calculate_hydrophobicity(sequence):
    hydrophobicity = sum(KD_SCALE[residue] for residue in sequence if residue in KD_SCALE) / len(sequence)
    return hydrophobicity

In [None]:
# Calculate hydrophobicity for each protein
prot_props = {}
for prot in protein_list: 
    prot_props[prot] = calculate_hydrophobicity(str(protein_list[prot]))
    print(f"{prot}: Hydrophobicity: {prot_props[prot][0]:.3}")

GuaPO: -0.0713
LCC_PAZy: -0.0478
LCC_ISSMpaper: -0.245
ISPETase: -0.167
PHL-7: -0.335
HiC: -0.0335
FastPETase: -0.152
ICCM: -0.185


In [None]:
#Calculating the isoelectric point (pI) of a protein sequence, charge, and aromaticity: 
from Bio import SeqIO
from Bio.SeqUtils.ProtParam import ProteinAnalysis 

prot_props = {}
for prot in protein_list: 
    pp = ProteinAnalysis(str(protein_list[prot]))
    prot_props[prot] = pp.aromaticity(), pp.charge_at_pH(7.4), pp.charge_at_pH(8.0), pp.isoelectric_point()
    print(f"{prot}: Aromaticity: {prot_props[prot][0]:.2%}, total charge at pH=7.4: {prot_props[prot][1]:.3}, total charge at pH=8.0: {prot_props[prot][2]:.3}, Isoelectric Point: {prot_props[prot][3]:.3}")

GuaPO: Aromaticity: 13.09%, total charge at pH=7.4: -9.48, total charge at pH=8.0: -10.4, Isoelectric Point: 4.95
LCC_PAZy: Aromaticity: 9.56%, total charge at pH=7.4: 4.43, total charge at pH=8.0: 3.82, Isoelectric Point: 9.32
LCC_ISSMpaper: Aromaticity: 9.29%, total charge at pH=7.4: 3.65, total charge at pH=8.0: 2.88, Isoelectric Point: 9.14
ISPETase: Aromaticity: 7.93%, total charge at pH=7.4: 8.23, total charge at pH=8.0: 7.6, Isoelectric Point: 9.65
PHL-7: Aromaticity: 8.24%, total charge at pH=7.4: -6.37, total charge at pH=8.0: -7.1, Isoelectric Point: 5.97
HiC: Aromaticity: 6.19%, total charge at pH=7.4: 1.52, total charge at pH=8.0: 0.799, Isoelectric Point: 8.43
FastPETase: Aromaticity: 7.93%, total charge at pH=7.4: 7.27, total charge at pH=8.0: 6.6, Isoelectric Point: 9.51
ICCM: Aromaticity: 8.99%, total charge at pH=7.4: 4.6, total charge at pH=8.0: 3.69, Isoelectric Point: 9.12
