# File download using Biopython

In [1]:
from Bio.PDB import PDBList

In [2]:
pdb_list = PDBList()
type(pdb_list)

Bio.PDB.PDBList.PDBList

In [3]:
pdb_list

<Bio.PDB.PDBList.PDBList at 0x29247d25c10>

In [4]:
print(pdb_list)

<Bio.PDB.PDBList.PDBList object at 0x0000029247D25C10>


In [5]:
pdb_id = '4hhb'
pdb_filename = pdb_list.retrieve_pdb_file(pdb_id, pdir = "Data/PDB_folder", file_format = "mmCif")
print(pdb_filename)

Downloading PDB structure '4hhb'...
Data/PDB_folder\4hhb.cif


Programmatic Access of APIs
Requests library is used to retrieve information from websites and URLs.

In [1]:
import requests

# PDB Data API

To get information from a URL, we use the requests.get() method. 
The argument to this function is the URL weâ€™d like to retrieve information from.

In [2]:
data = requests.get("https://data.rcsb.org/rest/v1/core/entry/4hhb")

In [3]:
data.status_code

200

In [4]:
data_2 = requests.get("https://data.rcsb.org/rest/v1/core/entry/4hhbkldjkldsg")

In [5]:
data_2.status_code

404

To see the JSON associated with our request by calling the .json() method, which we will save in a variable.
Our variable is now similar to a Python dictionary, which is a data type that has key, value pairs.

In [6]:
info_4hhb = data.json()
print(type(info_4hhb))

<class 'dict'>


In [7]:
info_4hhb

{'audit_author': [{'name': 'Fermi, G.', 'pdbx_ordinal': 1},
  {'name': 'Perutz, M.F.', 'pdbx_ordinal': 2}],
 'cell': {'angle_alpha': 90.0,
  'angle_beta': 99.34,
  'angle_gamma': 90.0,
  'length_a': 63.15,
  'length_b': 83.59,
  'length_c': 53.8,
  'zpdb': 4},
 'citation': [{'country': 'UK',
   'id': 'primary',
   'journal_abbrev': 'J.Mol.Biol.',
   'journal_id_astm': 'JMOBAK',
   'journal_id_csd': '0070',
   'journal_id_issn': '0022-2836',
   'journal_volume': '175',
   'page_first': '159',
   'page_last': '174',
   'pdbx_database_id_doi': '10.1016/0022-2836(84)90472-8',
   'pdbx_database_id_pub_med': 6726807,
   'rcsb_authors': ['Fermi, G.', 'Perutz, M.F.', 'Shaanan, B.', 'Fourme, R.'],
   'rcsb_is_primary': 'Y',
   'rcsb_journal_abbrev': 'J Mol Biol',
   'title': 'The crystal structure of human deoxyhaemoglobin at 1.74 A resolution',
   'year': 1984},
  {'country': 'UK',
   'id': '1',
   'journal_abbrev': 'Nature',
   'journal_id_astm': 'NATUAS',
   'journal_id_csd': '0006',
   'jou

In [8]:
info_4hhb.keys()

dict_keys(['audit_author', 'cell', 'citation', 'diffrn', 'entry', 'exptl', 'exptl_crystal', 'pdbx_audit_revision_category', 'pdbx_audit_revision_details', 'pdbx_audit_revision_group', 'pdbx_audit_revision_history', 'pdbx_audit_revision_item', 'pdbx_database_pdbobs_spr', 'pdbx_database_related', 'pdbx_database_status', 'rcsb_accession_info', 'rcsb_entry_container_identifiers', 'rcsb_entry_info', 'rcsb_primary_citation', 'refine', 'refine_hist', 'struct', 'struct_keywords', 'symmetry', 'rcsb_id'])

In [9]:
info_4hhb.values()

dict_values([[{'name': 'Fermi, G.', 'pdbx_ordinal': 1}, {'name': 'Perutz, M.F.', 'pdbx_ordinal': 2}], {'angle_alpha': 90.0, 'angle_beta': 99.34, 'angle_gamma': 90.0, 'length_a': 63.15, 'length_b': 83.59, 'length_c': 53.8, 'zpdb': 4}, [{'country': 'UK', 'id': 'primary', 'journal_abbrev': 'J.Mol.Biol.', 'journal_id_astm': 'JMOBAK', 'journal_id_csd': '0070', 'journal_id_issn': '0022-2836', 'journal_volume': '175', 'page_first': '159', 'page_last': '174', 'pdbx_database_id_doi': '10.1016/0022-2836(84)90472-8', 'pdbx_database_id_pub_med': 6726807, 'rcsb_authors': ['Fermi, G.', 'Perutz, M.F.', 'Shaanan, B.', 'Fourme, R.'], 'rcsb_is_primary': 'Y', 'rcsb_journal_abbrev': 'J Mol Biol', 'title': 'The crystal structure of human deoxyhaemoglobin at 1.74 A resolution', 'year': 1984}, {'country': 'UK', 'id': '1', 'journal_abbrev': 'Nature', 'journal_id_astm': 'NATUAS', 'journal_id_csd': '0006', 'journal_id_issn': '0028-0836', 'journal_volume': '295', 'page_first': '535', 'rcsb_authors': ['Perutz, M.

# dictionary_name["key_name"] = value

In [10]:
# to know cell information
info_4hhb["cell"]

{'angle_alpha': 90.0,
 'angle_beta': 99.34,
 'angle_gamma': 90.0,
 'length_a': 63.15,
 'length_b': 83.59,
 'length_c': 53.8,
 'zpdb': 4}

In [11]:
# to know experimental method
info_4hhb['exptl']

[{'method': 'X-RAY DIFFRACTION'}]

In [12]:
# to know structure keywords
info_4hhb["struct_keywords"]

{'pdbx_keywords': 'OXYGEN TRANSPORT', 'text': 'OXYGEN TRANSPORT'}

In [13]:
info_4hhb['struct']

{'title': 'THE CRYSTAL STRUCTURE OF HUMAN DEOXYHAEMOGLOBIN AT 1.74 ANGSTROMS RESOLUTION'}

In [14]:
info_4hhb['rcsb_entry_info']

{'assembly_count': 1,
 'branched_entity_count': 0,
 'cis_peptide_count': 0,
 'deposited_atom_count': 4779,
 'deposited_deuterated_water_count': 0,
 'deposited_hydrogen_atom_count': 0,
 'deposited_model_count': 1,
 'deposited_modeled_polymer_monomer_count': 574,
 'deposited_nonpolymer_entity_instance_count': 6,
 'deposited_polymer_entity_instance_count': 4,
 'deposited_polymer_monomer_count': 574,
 'deposited_solvent_atom_count': 221,
 'deposited_unmodeled_polymer_monomer_count': 0,
 'disulfide_bond_count': 0,
 'entity_count': 5,
 'experimental_method': 'X-ray',
 'experimental_method_count': 1,
 'inter_mol_covalent_bond_count': 0,
 'inter_mol_metalic_bond_count': 4,
 'molecular_weight': 64.74,
 'na_polymer_entity_types': 'Other',
 'nonpolymer_bound_components': ['HEM'],
 'nonpolymer_entity_count': 2,
 'nonpolymer_molecular_weight_maximum': 0.62,
 'nonpolymer_molecular_weight_minimum': 0.09,
 'polymer_composition': 'heteromeric protein',
 'polymer_entity_count': 2,
 'polymer_entity_count

#The format for querying about interfaces is
https://data.rcsb.org/rest/v1/core/interface/<pdb_id>/<assembly_id>/<interface_id>

In [2]:
interface = requests.get("https://data.rcsb.org/rest/v1/core/interface/4hhb/1/1")

In [3]:
interface.status_code

200

In [4]:
interface_info = interface.json()

In [5]:
interface_info

{'rcsb_id': '4HHB-1.1',
 'rcsb_interface_container_identifiers': {'entry_id': '4HHB',
  'assembly_id': '1',
  'interface_id': '1',
  'interface_entity_id': '1',
  'rcsb_id': '4HHB-1.1'},
 'rcsb_interface_partner': [{'interface_partner_identifier': {'entity_id': '1',
    'asym_id': 'C'},
   'interface_partner_feature': [{'name': 'Unbound ASA',
     'provenance_source': 'biojava-7.1.1',
     'type': 'ASA_UNBOUND',
     'feature_positions': [{'beg_seq_id': 1,
       'end_seq_id': 141,
       'values': [175.40798204351609,
        22.47780012672553,
        43.88487510543222,
        112.83651548058329,
        63.067326750915974,
        15.092271426424315,
        43.88143317652094,
        90.06075414022068,
        33.838691374419845,
        1.4780803877650932,
        104.8485059641843,
        63.921976926132096,
        4.1861483348042325,
        24.028397269394915,
        54.9441282194477,
        150.11843261572352,
        1.0928985089678787,
        38.319779967365456,
      

In [6]:
interface_info.keys()

dict_keys(['rcsb_id', 'rcsb_interface_container_identifiers', 'rcsb_interface_partner', 'rcsb_interface_operator', 'rcsb_interface_info', 'rcsb_latest_revision'])

In [7]:
interface_info["rcsb_interface_info"]

{'polymer_composition': 'Protein (only)',
 'interface_character': 'hetero',
 'interface_area': 847.773205021308,
 'num_interface_residues': 44,
 'num_core_interface_residues': 11}

In [8]:
interface_info['rcsb_id']

'4HHB-1.1'

In [9]:
interface_2 = requests.get("https://data.rcsb.org/rest/v1/core/interface/4hhb/1/2")

In [10]:
interface_2.status_code

200

In [11]:
interface_info_2 = interface_2.json()
interface_info_2['rcsb_interface_info']

{'polymer_composition': 'Protein (only)',
 'interface_character': 'hetero',
 'interface_area': 824.2706996807091,
 'num_interface_residues': 43,
 'num_core_interface_residues': 11}

# PDB Search API

In [12]:
import json

In [13]:
my_query = {
  "query": {
    "type": "terminal",
    "service": "full_text",
    "parameters": {
        "value": '"oxygen storage"'
    }
  },
  
  "return_type": "entry"
}

my_query = json.dumps(my_query)

In [14]:
data = requests.get(f"https://search.rcsb.org/rcsbsearch/v2/query?json={my_query}")

In [15]:
results = data.json()
results

{'query_id': 'f3a4066f-6e14-4226-9235-88a9559a8965',
 'result_type': 'entry',
 'total_count': 668,
 'result_set': [{'identifier': '2BMM', 'score': 1.0},
  {'identifier': '1UVY', 'score': 0.9859882453159046},
  {'identifier': '1UVX', 'score': 0.9723636888318609},
  {'identifier': '1UX8', 'score': 0.959110474765721},
  {'identifier': '2AWC', 'score': 0.9347199214821668},
  {'identifier': '7DDS', 'score': 0.9347199214821668},
  {'identifier': '1D8U', 'score': 0.9336593282142941},
  {'identifier': '1JP6', 'score': 0.9336593282142941},
  {'identifier': '1JP8', 'score': 0.9336593282142941},
  {'identifier': '2EB8', 'score': 0.9336593282142941}],
 'facets': []}

In [16]:
type(results)

dict

In [17]:
results['result_set'][0]['identifier']

'2BMM'

In [18]:
first_result = results["result_set"][0]["identifier"]
print(first_result)

2BMM


In [21]:
second_result = results['result_set'][1]['identifier']
print(second_result)

1UVY


In [36]:
x = requests.get("https://w3schools.com")

In [37]:
print(x.status_code)

200


In [38]:
if x.status_code == 200:
    print("Successful!")
elif x.status_code == 404:
    print("Not Found.")

Successful!


# Or

In [39]:
if x:
    print("Success!")
else:
    raise Exception(f"Non-success status code: {x.status_code}")

Success!
