In [2]:
from pyrosetta import *
from pyrosetta.rosetta import *
from pyrosetta.teaching import *

from pyrosetta.rosetta.protocols.carbohydrates import *
from pyrosetta.rosetta.core.select.residue_selector import *
from pyrosetta.rosetta.core.simple_metrics.metrics import *
from pyrosetta.rosetta.core.simple_metrics.composite_metrics import *
from pyrosetta.rosetta.core.simple_metrics.per_residue_metrics import *

from scipy.spatial.transform import Rotation as R

from utils.carb_utils import *

options = """
-beta
-include_sugars
-alternate_3_letter_codes pdb_sugar

-write_pdb_link_records
-auto_detect_glycan_connections
-ignore_unrecognized_res
-out:level 100
"""

#-out:level 100

init(" ".join(options.split('\n')))

import os
import numpy as np
import pandas as pd
import copy

input_dir = "./"
os.chdir(input_dir)

┌──────────────────────────────────────────────────────────────────────────────┐
│                                 PyRosetta-4                                  │
│              Created in JHU by Sergey Lyskov and PyRosetta Team              │
│              (C) Copyright Rosetta Commons Member Institutions               │
│                                                                              │
│ NOTE: USE OF PyRosetta FOR COMMERCIAL PURPOSES REQUIRE PURCHASE OF A LICENSE │
│         See LICENSE.PyRosetta.md or email license@uw.edu for details         │
└──────────────────────────────────────────────────────────────────────────────┘
PyRosetta-4 2024 [Rosetta PyRosetta4.Release.python311.m1 2024.19+release.a34b73c40fe9c61558d566d6a63f803cfb15a4fc 2024-05-02T16:22:03] retrieved from: http://www.pyrosetta.org


In [3]:
sc = get_score_function()
fr = pyrosetta.rosetta.protocols.relax.FastRelax()
fr.set_scorefxn(sc)
fr.max_iter(100)

In [4]:
#Get list of proteins
pdb = [];

df = pd.read_csv('./carbbinders_pdblist.txt',header=None).values

print(len(df),df)

6559 [['3old']
 ['3ole']
 ['1pig']
 ...
 ['5DFM']
 ['4A34']
 ['5HQJ']]


In [5]:

        
#Determines if the protein is glycosylated or we have free carbohydrates
def is_glycosylated(pose):
    """
    Gets whether a pose contains glycosylated proteins
    Args:
        pose : pyrosetta pose 
    Returns:
        bool : True / False
    """
    tree_set = pose.glycan_tree_set()
    for start in tree_set.get_start_points():
        parent = tree_set.get_parent(start)
        if parent != 0:
            return True;
    return False
    

In [6]:
os.getcwd()

'/Users/scanner1/Downloads/capdock'

In [7]:
pose = pyrosetta.toolbox.pose_from_rcsb('1bag',ATOM=False)

In [8]:
tree_set = pose.glycan_tree_set()

In [9]:
s = []
for start in tree_set.get_start_points():
    print(start, pose.pdb_info().pose2pdb(start), pose.residue_type(start).name3(), pose.residue_type(start).name())
    s.append(start)

429 1 B  Glc ->4)-beta-D-Glcp:reducing_end


In [10]:
tree1 = tree_set.get_tree(s[0])

In [11]:
tree1.get_residues()

vector1_unsigned_long[429, 430, 431, 432, 433]

In [12]:
#single test

out = ""

#get the number of trees
tree_set = pose.glycan_tree_set()
s = []
for start in tree_set.get_start_points():
    #print(start, pose.pdb_info().pose2pdb(start), pose.residue_type(start).name3(), pose.residue_type(start).name())
    s.append(start)

prot = pose.clone()
    
ind = 0;
for i in s:
    tree = tree_set.get_tree(i)
    res = np.array(tree.get_residues())
    carb = pose.clone()
    for j in range(pose.size()-1,0,-1):
        if j not in res:
            carb.delete_residue_slow(j)
    #print(carb)
    
    
    #save the carb
    carb.dump_pdb('pdb_pre/1bag_carb' + str(ind) + ".pdb")
    
    #relax the carb and save rosetta relax
    #fr.apply(carb)
    #carb.dump_pdb('pdb_pre/1bag_carb' + str(ind) + "_rosRel.pdb")
    
    ind += 1;
    

#have a clean protein file
for j in range(prot.size(),0,-1):
    #print(j)
    if prot.residue(j).is_protein():
        continue;
    print(j)
    prot.delete_residue_slow(j)
    

#output the fasta
for ii in range(1,prot.num_chains()+1):
    out += ">1BAG_" + str(ii) + "\n"
    out += prot.chain_sequence(ii) + "\n"
    
prot.dump_pdb('pdb_pre/1bag_prot.pdb')
#print(out)
    

433
432
431
430
429
428
427
426


True

In [16]:
def output_pdbs(name,pose):

    out = ""

    #get the number of trees
    tree_set = pose.glycan_tree_set()
    s = []
    for start in tree_set.get_start_points():
        #print(start, pose.pdb_info().pose2pdb(start), pose.residue_type(start).name3(), pose.residue_type(start).name())
        s.append(start)

    #print(pose.size())
    prot = pose.clone()

    ind = 0;
    for i in s:
        tree = tree_set.get_tree(i)
        res = np.array(tree.get_residues())
        carb = pose.clone()
        for j in range(carb.size(),0,-1):
            if j == 0:
                break;
            #print(j,res)
            if j not in res:
                #print('\t',j)
                carb.delete_residue_slow(j)

        #print(carb)


        #save the carb
        carb.dump_pdb('pdb_pre/' + name + '_carb' + str(ind) + ".pdb")

        #relax the carb and save rosetta relax
        fr.apply(carb)
        carb.dump_pdb('pdb_pre/' + name + '_carb' + str(ind) + "_rosRel.pdb")

        ind += 1;

    #print(pose.size(),carb.size(),prot.size())
    #have a clean protein file
    for j in range(prot.size(),0,-1):
        #print(j)
        if j == 0:
            break;
        if prot.residue(j).is_protein():
            continue;
        #print(j)
        prot.delete_residue_slow(j)

    #print(pose.size(),carb.size(),prot.size())
    #output the fasta
    for ii in range(1,prot.num_chains()+1):
        out += ">" + name + "_" + str(ii) + "\n"
        out += prot.chain_sequence(ii) + "\n"

    prot.dump_pdb('pdb_pre/' + name + '_prot.pdb')
    #print(out)
    return out
    

In [45]:
output_pdbs('1bag',pyrosetta.toolbox.pose_from_rcsb('1bag'.upper(),ATOM=False))

433
432 [429 430 431 432 433]
431 [429 430 431 432 433]
430 [429 430 431 432 433]
429 [429 430 431 432 433]
428 [429 430 431 432 433]
	 428
427 [429 430 431 432 433]
	 427
426 [429 430 431 432 433]
	 426
425 [429 430 431 432 433]
	 425
424 [429 430 431 432 433]
	 424
423 [429 430 431 432 433]
	 423
422 [429 430 431 432 433]
	 422
421 [429 430 431 432 433]
	 421
420 [429 430 431 432 433]
	 420
419 [429 430 431 432 433]
	 419
418 [429 430 431 432 433]
	 418
417 [429 430 431 432 433]
	 417
416 [429 430 431 432 433]
	 416
415 [429 430 431 432 433]
	 415
414 [429 430 431 432 433]
	 414
413 [429 430 431 432 433]
	 413
412 [429 430 431 432 433]
	 412
411 [429 430 431 432 433]
	 411
410 [429 430 431 432 433]
	 410
409 [429 430 431 432 433]
	 409
408 [429 430 431 432 433]
	 408
407 [429 430 431 432 433]
	 407
406 [429 430 431 432 433]
	 406
405 [429 430 431 432 433]
	 405
404 [429 430 431 432 433]
	 404
403 [429 430 431 432 433]
	 403
402 [429 430 431 432 433]
	 402
401 [429 430 431 432 433]
	 

'>1bag_1\nLTAPSIKSGTILHAWNWSFNTLKHNMKDIHDAGYTAIQTSPINQVKEGNQGDKSMSNWYWLYQPTSYQIGNRYLGTEQEFKEMCAAAEEYGIKVIVDAVINHTTFDYAAISNEVKSIPNWTHGNTQIKNWSDRWDVTQNSLLGLYDWNTQNTQVQSYLKRFLERALNDGADGFRFDAAKHIELPDDGSYGSQFWPNITNTSAEFQYGQILQDSASRDAAYANYMDVTASNYGHSIRSALKNRNLGVSNISHYASDVSADKLVTWVESHDTYANDDEESTWMSDDDIRLGWAVIASRSGSTPLFFSRPEGGGNGVRFPGKSQIGDRGSALFEDQAITAVNRFHNVMAGQPEELSNPNGNNQIFMNQRGSHGVVLANAGSSSVSINTATKLPDGRYDNKAGAGSFQVNDGKLTGTINARSVAVLYPD\n'

In [22]:
fa = ''
for ii in df[18:21]:

    #print(ii[0])
    #try:
        pdb = ii[0]
        print(pdb)
        pose = pyrosetta.toolbox.pose_from_rcsb(pdb.upper(),ATOM=False)
        fa += output_pdbs(pdb,pose)

        f = open('pdb_pre/fasta.fa','w+')
        f.write(fa)
        f.close()
    #except:
    #    print("Unable: ",pdb)
    
    #break;

1e5j



ERROR: Error in core::conformation::Conformation::residue(): The sequence position requested was 0.  Pose numbering starts at 1.
ERROR:: Exit from: /Volumes/scratch/w/rosetta/commits/rosetta/source/build/PyRosetta/macOS-12.7-arm64-arm-64bit/clang-14.0.0/python-3.11/release/source/src/core/conformation/Conformation.hh line: 513


RuntimeError: 

File: /Volumes/scratch/w/rosetta/commits/rosetta/source/build/PyRosetta/macOS-12.7-arm64-arm-64bit/clang-14.0.0/python-3.11/release/source/src/core/conformation/Conformation.hh:513
[ ERROR ] UtilityExitException
ERROR: Error in core::conformation::Conformation::residue(): The sequence position requested was 0.  Pose numbering starts at 1.

