In [None]:
import requests
import json

def fetch_protein_summary(pdb_id):
    url = f"https://www.ebi.ac.uk/pdbe/api/pdb/entry/summary/{pdb_id}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print("Failed to retrieve summary data")
        return None

def fetch_protein_sequences(pdb_id):
    url = f"https://www.ebi.ac.uk/pdbe/api/pdb/entry/entities/{pdb_id}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print("Failed to retrieve sequence data")
        return None

# Example usage
pdb_id = '5G6U'
protein_summary = fetch_protein_summary(pdb_id)
protein_sequences = fetch_protein_sequences(pdb_id)

# print("Summary Data:")
# print(json.dumps(protein_summary, indent=4))

print("\nSequence Data:")
print(json.dumps(protein_sequences, indent=4))
for entity in protein_sequences[pdb_id.lower()]:
    if entity['molecule_type'] == 'polypeptide(L)':
        print(f"Chain ID: {entity['in_chains'][0]}, Sequence: {entity['sequence']}, Length: {len(entity['sequence'])}")


In [3]:
import requests
import os

def download_pdb_file(pdb_id, directory='results'):
    url = f"http://files.rcsb.org/download/{pdb_id}.pdb"
    response = requests.get(url)
    if response.status_code == 200:
        os.makedirs(directory, exist_ok=True)
        file_path = os.path.join(directory, f"{pdb_id}.pdb")
        with open(file_path, 'w') as file:
            file.write(response.text)
        return file_path
    else:
        print("Failed to download PDB file")
        return None

# Example usage
pdb_id = '5G6U'
pdb_file_path = download_pdb_file(pdb_id)

if pdb_file_path:
    # Here you would need to run DSSP or another tool on the downloaded PDB file
    print(f"PDB file downloaded to {pdb_file_path}")
    # Analyze the file with DSSP or other tools here


PDB file downloaded to results/5G6U.pdb


In [9]:
from Bio.PDB import PDBParser, DSSP, PPBuilder

def run_dssp(pdb_file_path):
    parser = PDBParser()
    structure = parser.get_structure("protein", pdb_file_path)
    model = structure[0]  # Analyze the first model in the PDB file

    dssp = DSSP(model, pdb_file_path, dssp="/usr/bin/dssp")  # Specify the path to your DSSP executable
    amino_acids = []
    ss3 = []
    asa = []
    phi = []
    psi = []
    for residue in dssp:
        amino_acids.append(residue[0])
        ss3.append(residue[2])  # SS3
        asa.append(residue[3])  # ASA
        phi.append(residue[4])
        psi.append(residue[5])
    # end for

    return amino_acids, ss3, asa, phi, psi
# end def
amino_acids, ss3, asa, phi, psi = run_dssp(pdb_file_path)



In [22]:
from Bio.PDB import PDBParser, DSSP, PPBuilder

def run_dssp(pdb_file_path):
    parser = PDBParser()
    structure = parser.get_structure("protein", pdb_file_path)
    model = structure[0]  # Analyze the first model in the PDB file
    print(model)

    # dssp = DSSP(model, pdb_file_path, dssp="/usr/bin/dssp")  # Specify the path to your DSSP executable
    ppb = PPBuilder()
    chain_id = "A"
    for model in structure:
        print('model')
        for chain in model:
            # if chain.id == chain_id:
                # Extracting the amino acid sequences from the chain
            for pp in ppb.build_peptides(chain):
                sequence = pp.get_sequence()
                print(f"Chain {chain.id} Sequence: {sequence}, Length: {len(sequence)}")

# If no sequence is printed, it means the specified chain was not found
    # end for

    return amino_acids, ss3, asa, phi, psi
# end def
amino_acids, ss3, asa, phi, psi = run_dssp(pdb_file_path)

<Model id=0>
model
Chain A Sequence: SQGWKYFKGNFYYFSLIPKTWYSAEQFCVSRNSHLTSVTSESEQEFLYKTAGGLIYWIGLTKAGMEGDWSWVDDTPFNKVQSVRFWIPGEPNNAGNNEHCGNIKAPSLQAWNDAPCDKTFLFICKRPYVPS, Length: 131
Chain B Sequence: GWKYFKGNFYYFSLIPKTWYSAEQFCVSRNSHLTSVTSESEQEFLYKTAGGLIYWIGLTKAGMEGDWSWVDDTPFNKVQSVRFWIPGEPNNAGNNEHCGNIKAPSLQAWNDAPCDKTFLFICKRPYVP, Length: 128
Chain C Sequence: GWKYFKGNFYYFSLIPKTWYSAEQFCVSRNSHLTSVTSESEQEFLYKTAGGLIYWIGLTKAGMEGDWSWVDDTPFNKVQSVRFWIPGEPNNAGNNEHCGNIKAPSLQAWNDAPCDKTFLFICKRPYVP, Length: 128
Chain D Sequence: GWKYFKGNFYYFSLIPKTWYSAEQFCVSRNSHLTSVTSESEQEFLYKTAGGLIYWIGLTKAGMEGDWSWVDDTPFNKVQSVRFWIPGEPNNAGNNEHCGNIKAPSLQAWNDAPCDKTFLFICKRPYV, Length: 127




In [10]:
print(len(amino_acids))

514
