In [None]:
import requests
import csv
import time

def fetch_pdb_ids(uniprot_id, max_results=338):
    print(f"Fetching PDB IDs for UniProt ID: {uniprot_id}")
    url = "https://search.rcsb.org/rcsbsearch/v2/query"
    query = {
        "query": {
            "type": "terminal",
            "service": "text",
            "parameters": {
                "attribute": "rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers.database_accession",
                "operator": "exact_match",
                "value": uniprot_id
            }
        },
        "return_type": "entry",
        "request_options": {
            "return_all_hits": True
        }
    }

    response = requests.post(url, json=query)
    data = response.json()
    print("Raw API response:")
    print(response.text)

    pdb_ids = [result["identifier"] for result in data.get("result_set", [])][:max_results]
    print(f"Found {len(pdb_ids)} PDB IDs: {pdb_ids}")
    return pdb_ids


def fetch_data(pdb_id):
    print(f"Fetching mutation count for PDB ID: {pdb_id}")
    # Check if struct_ref_seq_dif exists
    url_mutation = f"https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/1"
    mutation_response = requests.get(url_mutation)
    
    if mutation_response.status_code == 200:
        mutation_data = mutation_response.json()
        mutation_count = mutation_data["entity_poly"]["rcsb_mutation_count"]
        pdb_desc = mutation_data["rcsb_polymer_entity"]["pdbx_description"]
        return pdb_desc, mutation_count
    return None, None

def main(uniprot_id):
    pdb_ids = fetch_pdb_ids(uniprot_id)
    mutation_data = []

    for pdb_id in pdb_ids:
        try:
            a, b = fetch_data(pdb_id)
            mutation_data.append((pdb_id, a, b))
        except Exception as e:
            print(f"Error parsing mutation count for {pdb_id}: {e}")
            mutation_data.append((pdb_id, "Error"))

        time.sleep(1)  # Be gentle and nice to the API

    # Save results
    with open("mutation_counts.csv", "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["PDB_ID","PDB_Description", "Mutation_Count"])
        writer.writerows(mutation_data)

    print("Mutation data saved to mutation_counts.csv.")

# Example usage
main("P61823")

Fetching PDB IDs for UniProt ID: P61823
Raw API response:
{
  "query_id" : "7166e95c-2181-4457-921f-49affe467bab",
  "result_type" : "entry",
  "total_count" : 338,
  "result_set" : [ {
    "identifier" : "1A2W",
    "score" : 1.0
  }, {
    "identifier" : "1A5P",
    "score" : 1.0
  }, {
    "identifier" : "1A5Q",
    "score" : 1.0
  }, {
    "identifier" : "1AFK",
    "score" : 1.0
  }, {
    "identifier" : "1AFL",
    "score" : 1.0
  }, {
    "identifier" : "1AFU",
    "score" : 1.0
  }, {
    "identifier" : "1AQP",
    "score" : 1.0
  }, {
    "identifier" : "1B6V",
    "score" : 1.0
  }, {
    "identifier" : "1BEL",
    "score" : 1.0
  }, {
    "identifier" : "1BZQ",
    "score" : 1.0
  }, {
    "identifier" : "1C0B",
    "score" : 1.0
  }, {
    "identifier" : "1C0C",
    "score" : 1.0
  }, {
    "identifier" : "1C8W",
    "score" : 1.0
  }, {
    "identifier" : "1C9V",
    "score" : 1.0
  }, {
    "identifier" : "1C9X",
    "score" : 1.0
  }, {
    "identifier" : "1CJQ",
    "sc