In [None]:
!pip install biopython numpy



In [None]:
from google.colab import files
uploaded = files.upload()

Saving fold_candidate5_mcherry_g4s3_nanobody_his6_model_0.cif to fold_candidate5_mcherry_g4s3_nanobody_his6_model_0.cif
Saving fold_candidate5_mcherry_g4s3_nanobody_his6_model_1.cif to fold_candidate5_mcherry_g4s3_nanobody_his6_model_1.cif
Saving fold_candidate5_mcherry_g4s3_nanobody_his6_model_2.cif to fold_candidate5_mcherry_g4s3_nanobody_his6_model_2.cif
Saving fold_candidate5_mcherry_g4s3_nanobody_his6_model_3.cif to fold_candidate5_mcherry_g4s3_nanobody_his6_model_3.cif
Saving fold_candidate5_mcherry_g4s3_nanobody_his6_model_4.cif to fold_candidate5_mcherry_g4s3_nanobody_his6_model_4.cif


In [None]:
import numpy as np
from Bio.PDB import MMCIFParser, Selection
from Bio.PDB.MMCIF2Dict import MMCIF2Dict
import io

def get_global_plddt(cif_file):
    """Extracts the global pLDDT from the _ma_qa_metric_global.metric_value field"""
    cif_dict = MMCIF2Dict(cif_file)
    try:
        global_plddt = float(cif_dict['_ma_qa_metric_global.metric_value'][0])
        return global_plddt
    except KeyError:
        print(f"Warning: Could not find global pLDDT in {cif_file}. Using per-residue average instead.")
        return None

def get_per_residue_plddt(cif_file, chain_id='A'):
    """Extracts the per-residue pLDDT (B-factor) for a specific chain from a CIF file."""
    parser = MMCIFParser(QUIET=True)
    structure = parser.get_structure('model', cif_file)

    plddt_dict = {} # Dictionary to hold data: {residue_id: plddt_value}

    for model in structure:
        for chain in model:
            if chain.id == chain_id:
                for residue in chain:
                    res_id = residue.get_id()
                    if res_id[0] == ' ':  # Only amino acids
                        try:
                            ca_atom = residue['CA']
                            plddt_value = ca_atom.get_bfactor()
                            plddt_dict[res_id[1]] = plddt_value
                        except KeyError:
                            pass
    return plddt_dict

# Get the list of uploaded files
uploaded_files = list(uploaded.keys())
print(f"Uploaded files: {uploaded_files}")

# Assuming we want chain A. Change this if your protein is in a different chain.
chain_id = 'A'

# Dictionary to store arrays of pLDDT values for each residue from all models
all_model_data = {}
global_scores = []

for i, filename in enumerate(uploaded_files):
    print(f"\nProcessing model {i} ({filename}):")

    # Get the global pLDDT for this model
    global_plddt = get_global_plddt(filename)
    if global_plddt:
        global_scores.append(global_plddt)
        print(f"  Global pLDDT: {global_plddt:.2f}")

    # Get the per-residue pLDDT for this model
    model_plddt = get_per_residue_plddt(filename, chain_id)

    # For each residue in this model, add its pLDDT to a list in all_model_data
    for res_num, plddt_val in model_plddt.items():
        if res_num not in all_model_data:
            all_model_data[res_num] = []
        all_model_data[res_num].append(plddt_val)

# Calculate and print results
print("\n" + "="*50)
print("RESULTS")
print("="*50)

# 1. Print Global Averages
if global_scores:
    print(f"Mean Global pLDDT across all models: {np.mean(global_scores):.2f}")
    print(f"Global pLDDT of top-ranked model: {global_scores[0]:.2f}")

# 2. Calculate and print the mean per-residue pLDDT across all models
all_per_residue_means = []
print(f"\n{'Residue':<10} {'Mean pLDDT':<12} {'Num Models':<10}")
print("-" * 35)

# Sort the dictionary by residue number
for res_num in sorted(all_model_data.keys()):
    mean_val = np.mean(all_model_data[res_num])
    all_per_residue_means.append(mean_val)
    print(f"{res_num:<10} {mean_val:<12.2f} {len(all_model_data[res_num]):<10}")

# 3. The overall mean pLDDT
final_mean_plddt = np.mean(all_per_residue_means)
print("="*35)
print(f"{'Final Mean pLDDT:':<20} {final_mean_plddt:.2f}")
print("="*35)

Uploaded files: ['fold_candidate5_mcherry_g4s3_nanobody_his6_model_0.cif', 'fold_candidate5_mcherry_g4s3_nanobody_his6_model_1.cif', 'fold_candidate5_mcherry_g4s3_nanobody_his6_model_2.cif', 'fold_candidate5_mcherry_g4s3_nanobody_his6_model_3.cif', 'fold_candidate5_mcherry_g4s3_nanobody_his6_model_4.cif']

Processing model 0 (fold_candidate5_mcherry_g4s3_nanobody_his6_model_0.cif):
  Global pLDDT: 67.83

Processing model 1 (fold_candidate5_mcherry_g4s3_nanobody_his6_model_1.cif):
  Global pLDDT: 65.84

Processing model 2 (fold_candidate5_mcherry_g4s3_nanobody_his6_model_2.cif):
  Global pLDDT: 68.68

Processing model 3 (fold_candidate5_mcherry_g4s3_nanobody_his6_model_3.cif):
  Global pLDDT: 63.21

Processing model 4 (fold_candidate5_mcherry_g4s3_nanobody_his6_model_4.cif):
  Global pLDDT: 62.14

RESULTS
Mean Global pLDDT across all models: 65.54
Global pLDDT of top-ranked model: 67.83

Residue    Mean pLDDT   Num Models
-----------------------------------
1          26.36        5    

**Reference:**

OpenAI. (2025, September 6). *ChatGPT* (Version 4). [Code generated in response to prompt: "Write a Python program to extract the mean pLDDT from a .cif file."]. https://chat.openai.com