In [7]:
import requests
import json

def fetch_pdb_ids(query):
    url = "https://search.rcsb.org/rcsbsearch/v2/query"
    headers = {"Content-Type": "application/json"}

    try:
        response = requests.post(url, json=query, headers=headers, timeout=30)
        response.raise_for_status()

        if response.status_code == 200:
            data = response.json()
            return [item['identifier'] for item in data.get('result_set', [])]
        return []

    except Exception as e:
        print(f"Request failed: {str(e)}")
        return []

# Query for Protein-Peptide Complexes using refined parameters
peptide_query = {
    "query": {
        "type": "group",
        "logical_operator": "and",
        "nodes": [
            {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "attribute": "rcsb_entry_container_identifiers.polymer_entity_types",
                    "operator": "contains_phrase",
                    "value": "Protein"
                }
            },
            {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "attribute": "rcsb_entry_info.deposited_polymer_entity_instance_count",
                    "operator": "greater",
                    "value": 1
                }
            },
            {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "attribute": "struct_keywords.pdbx_keywords",
                    "operator": "contains_phrase",
                    "value": "peptide"
                }
            }
        ]
    },
    "return_type": "entry",
    "request_options": {
        "results_content_type": ["experimental"],
        "paginate": {"start": 0, "rows": 10000}
    }
}

# Query for Protein-Protein Complexes using antigen recognition
Protein_query = {
    "query": {
        "type": "group",
        "logical_operator": "and",
        "nodes": [
            {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "attribute": "struct.title",
                    "operator": "contains_words",
                    "value": "Protein"
                }
            },
            {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "attribute": "rcsb_entry_info.structure_determination_methodology",
                    "operator": "exact_match",
                    "value": "experimental"
                }
            }
        ]
    },
    "return_type": "entry",
    "request_options": {
        "paginate": {"start": 0, "rows": 10000}
    }
}

print("Fetching Protein-Peptide Complexes...")
peptide_ids = fetch_pdb_ids(peptide_query)
print(f"Found {len(peptide_ids)} peptide complexes")

print("\nFetching Protein-Protein Complexes...")
Protein_ids = fetch_pdb_ids(Protein_query)
print(f"Found {len(Protein_ids)} Protein complexes")

all_ids = list(set(peptide_ids + Protein_ids))

print("\nTotal unique PDB IDs found:", len(all_ids))
print("\nPDB IDs:")
for pdb_id in sorted(all_ids):
    print(pdb_id)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
3P8A
3PBF
3PCQ
3PEO
3PES
3PF6
3PFQ
3PFS
3PFU
3PG0
3PGG
3PGP
3PGZ
3PH9
3PHY
3PIB
3PIK
3PJ5
3PJ7
3PJB
3PKN
3PL5
3PLU
3PLV
3PLW
3PM8
3PMR
3PMZ
3POM
3POO
3PQI
3PR1
3PR6
3PR9
3PRA
3PRB
3PRD
3PS0
3PS4
3PS5
3PTJ
3PTO
3PU6
3PVB
3PVH
3PVN
3PVO
3PW9
3PWX
3PYP
3PZY
3Q1C
3Q1J
3Q1S
3Q34
3Q4N
3Q62
3Q6K
3Q6P
3Q6T
3Q73
3Q75
3Q78
3Q79
3Q7A
3Q7E
3Q7F
3Q7L
3Q90
3Q98
3Q9N
3Q9U
3QA9
3QAC
3QAT
3QC1
3QCB
3QCN
3QDN
3QFE
3QFW
3QHQ
3QII
3QIJ
3QIS
3QJG
3QJJ
3QJL
3QJP
3QKP
3QKQ
3QOD
3QOE
3QOO
3QOP
3QOR
3QOY
3QQ5
3QQT
3QR8
3QSG
3QSQ
3QUA
3QUJ
3QVE
3QWE
3QY3
3QYF
3QZ0
3QZC
3R15
3R1O
3R26
3R3L
3R3R
3R3T
3R6A
3R6F
3R6U
3R72
3R79
3R87
3R8J
3R8K
3R90
3R9J
3RAP
3RAT
3RAY
3RAZ
3RBB
3RBH
3RBN
3RBY
3RD5
3RDI
3RDK
3REA
3REB
3REC
3RF2
3RG2
3RHI
3RHN
3RIG
3RIP
3RIR
3RIY
3RKC
3RKW
3RKX
3RKY
3RLS
3RMH
3RMQ
3RMS
3RNR
3RNS
3ROB
3ROF
3ROU
3RPF
3RPJ
3RPQ
3RPT
3RPX
3RQ9
3RRC
3RRI
3RRR
3RRT
3RRW
3RS1
3RSW
3RT3
3RTY
3RWT
3RX6
3RXY
3RY3
3RYD
3RYP
3RYR
3RZY
3S05
3S0A
3S0R
