In [1]:
cd /home/mp6576/comp-class-2023/comp-lab-class-2023/Week7/Inputs/rcsb_pdb_1UBQ_AlphaFold Greene

/home/mp6576/comp-class-2023/comp-lab-class-2023/Week7/Inputs/rcsb_pdb_1UBQ_AlphaFold Greene


In [5]:
from Bio.PDB import PDBParser

def extract_plddt_scores(pdb_file):
    plddt_scores = []
    residue_numbers = []
    
    # Create a PDB parser object
    parser = PDBParser()
    
    # Parse the PDB file
    structure = parser.get_structure('AlphaFold', pdb_file)
    
    # Iterate through atoms and extract B-factor values (pLDDT scores) along with residue numbers
    for model in structure:
        for chain in model:
            for residue in chain:
                residue_numbers.append(residue.id[1])
                for atom in residue:
                    # Check if B-factor value exists (some PDB files might not have it)
                    if atom.bfactor:
                        plddt_scores.append(atom.bfactor)
    
    return plddt_scores, residue_numbers

def print_plddt_scores_as_column(pdb_file):
    # Extract pLDDT scores and residue numbers
    plddt_scores, residue_numbers = extract_plddt_scores(pdb_file)
    
    # Print scores and residue numbers as a vertical column
    print("Residue Number\tpLDDT Score")
    print("---------------------------")
    for residue_number, score in zip(residue_numbers, plddt_scores):
        print(f"{residue_number}\t\t{score}")

def save_plddt_scores_to_file(pdb_file, output_file):
    # Extract pLDDT scores and residue numbers
    plddt_scores, residue_numbers = extract_plddt_scores(pdb_file)
    
    # Write scores and residue numbers to a text file
    with open(output_file, 'w') as f:
        f.write("Residue Number\tpLDDT Score\n")
        f.write("---------------------------\n")
        for residue_number, score in zip(residue_numbers, plddt_scores):
            f.write(f"{residue_number}\t\t{score}\n")

# Example usage
pdb_file = 'unrelaxed_model_1_pred_0.pdb'
output_file = 'plddt_scores.txt'

# Print pLDDT scores as a vertical column
print_plddt_scores_as_column(pdb_file)

# Save pLDDT scores to a text file
save_plddt_scores_to_file(pdb_file, output_file)
print(f"pLDDT scores saved to {output_file}")

Residue Number	pLDDT Score
---------------------------
1		95.5
2		95.5
3		95.5
4		95.5
5		95.5
6		95.5
7		95.5
8		95.5
9		97.02
10		97.02
11		97.02
12		97.02
13		97.02
14		97.02
15		97.02
16		97.02
17		97.02
18		98.19
19		98.19
20		98.19
21		98.19
22		98.19
23		98.19
24		98.19
25		98.19
26		98.3
27		98.3
28		98.3
29		98.3
30		98.3
31		98.3
32		98.3
33		98.3
34		98.3
35		98.3
36		98.3
37		98.15
38		98.15
39		98.15
40		98.15
41		98.15
42		98.15
43		98.15
44		97.93
45		97.93
46		97.93
47		97.93
48		97.93
49		97.93
50		97.93
51		97.93
52		97.93
53		94.76
54		94.76
55		94.76
56		94.76
57		94.76
58		94.76
59		94.76
60		90.29
61		90.29
62		90.29
63		90.29
64		90.29
65		90.29
66		90.29
67		90.29
68		91.94
69		91.94
70		91.94
71		91.94
72		91.94
73		91.94
74		91.94
75		93.74
76		93.74
pLDDT scores saved to plddt_scores.txt


In [8]:
def generate_pdb_with_bfactor(input_file, output_file, template_file):
    # Read pLDDT scores from text file, skipping the header line
    scores = []
    with open(input_file, 'r') as f:
        header = f.readline()  # Read and discard the header line
        for line in f:
            # Split the line into residue number and pLDDT score
            parts = line.split()
            if len(parts) >= 2:
                residue_number = parts[0]
                plddt_score = float(parts[1])
                scores.append((residue_number, plddt_score))
    
    # Write pLDDT scores to a new PDB file
    with open(output_file, 'w') as f:
        with open(template_file, 'r') as template:
            for line in template:
                if line.startswith('ATOM') or line.startswith('HETATM'):
                    # Find the residue number in the line and replace the B-factor value with pLDDT score
                    residue_number = line[22:26].strip()
                    for (score_residue_number, plddt_score) in scores:
                        if residue_number == score_residue_number:
                            line = line[:60] + f"{plddt_score:6.2f}" + line[66:]
                            break
                f.write(line)

# Example usage
input_file = 'plddt_scores.txt'
output_file = 'protein_with_plddt_scores.pdb'
template_file = 'unrelaxed_model_1_pred_0.pdb'  # Your template PDB file
generate_pdb_with_bfactor(input_file, output_file, template_file)
print(f"New PDB file with pLDDT scores generated: {output_file}")

New PDB file with pLDDT scores generated: protein_with_plddt_scores.pdb
