In [1]:
import pandas as pd
import numpy as np
from prody import *
import random
import csv
import os
import urllib.request
print("Imports done")

Imports done


In [None]:
tm_helices = pd.read_csv("tm_helices.csv")
samples=len(tm_helices.axes[0])
print("TM helices:", samples)

transmembrane_pdbids = tm_helices["PDB ID"].unique()

In [None]:
# Download Helices
filenames = []
print("Download pdbs ...")
print(len(transmembrane_pdbids))                
transmembrane_pdbids = list(transmembrane_pdbids)
filenames = fetchPDB(transmembrane_pdbids)
print(filenames)

In [93]:
# Parse helices
import sys

d = {'CYS': 'C', 'ASP': 'D', 'SER': 'S', 'GLN': 'Q', 'LYS': 'K',
     'ILE': 'I', 'PRO': 'P', 'THR': 'T', 'PHE': 'F', 'ASN': 'N', 
     'GLY': 'G', 'HIS': 'H', 'LEU': 'L', 'ARG': 'R', 'TRP': 'W', 
     'ALA': 'A', 'VAL':'V', 'GLU': 'E', 'TYR': 'Y', 'MET': 'M'}

helix_sequences = []
def parse_helices_pdb(pdb):
    atoms, header = parsePDB(pdb, header=True)
    helix_ranges = header.get('helix_range')
    helix_indices = []
    for helix_range in helix_ranges:
        helix_chain = helix_range[1]
        helix_indices.append([helix_chain, helix_range[4],helix_range[5]])

    sequence = {}
    for atom in atoms:
        if atom.getChid() in sequence.keys():
            sequence.get(atom.getChid()).update({atom.getResnum() : d.get(atom.getResname())})
        else:
            sequence[atom.getChid()] = {atom.getResnum() : d.get(atom.getResname())}
    for helix_index in helix_indices:
        helix_start_coords = []
        helix_end_coords = []
        helix_sequence = ""
        helix_start = helix_index[1]
        helix_end = helix_index[2]
        for key in range(helix_start,helix_end+1):
            try:
                helix_sequence = helix_sequence+sequence.get(helix_index[0]).get(key)
            except:
                continue
        helix_start_coords = list(atoms[helix_chain, helix_start]["CA"].getCoords())
        helix_end_coords = list(atoms[helix_chain, helix_end]["CA"].getCoords())
        helix = [header.get("identifier"), helix_chain, helix_start, helix_end, helix_start_coords, helix_end_coords, helix_end-helix_start, helix_sequence, "NaN"]    
        helix_sequences.append(helix)
    return helix_sequences

issues = []
path="./pdbtm_pdb/"
#path="./"
for file in os.listdir(path):
    if file.endswith(".gz"):
        try:
            parse_helices_pdb(path+file)
        except:
            issues.append(file)

columns = ["PDB ID", "Chain", "Helix Start", "Helix End", "Helix Start CA", "Helix End CA", "Helix Length", "Helix Sequence", "Is Transmembrane"]
print(pd.DataFrame(helix_sequences, columns=columns))
print("FINISHED!")

@> 2402 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 10342 atoms and 1 coordinate set(s) were parsed in 0.13s.
@> 9734 atoms and 1 coordinate set(s) were parsed in 0.18s.
@> 1959 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 4341 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 1802 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 1638 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 3015 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 3751 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> 313 atoms and 30 coordinate set(s) were parsed in 0.03s.
@> 1188 atoms and 20 coordinate set(s) were parsed in 0.09s.
@> 1668 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 4032 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 4578 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> 9489 atoms and 1 coordinate set(s) were parsed in 0.11s.
@> 1683 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 2849 atoms and 1 coordinate set(s) 

@> 53833 atoms and 1 coordinate set(s) were parsed in 0.95s.
@> 9201 atoms and 1 coordinate set(s) were parsed in 0.11s.
@> 3030 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 25021 atoms and 1 coordinate set(s) were parsed in 0.42s.
@> 7663 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> 7200 atoms and 1 coordinate set(s) were parsed in 0.13s.
@> 53198 atoms and 1 coordinate set(s) were parsed in 0.67s.
@> 1669 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 5228 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 2549 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 13692 atoms and 1 coordinate set(s) were parsed in 0.16s.
@> 7794 atoms and 1 coordinate set(s) were parsed in 0.13s.
@> 7890 atoms and 1 coordinate set(s) were parsed in 0.13s.
@> 1994 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 3423 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 5543 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 7580 atoms and 1 coordinate set(s

@> 5948 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 2048 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 17071 atoms and 1 coordinate set(s) were parsed in 0.19s.
@> 4074 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 4696 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 4213 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 22034 atoms and 1 coordinate set(s) were parsed in 0.25s.
@> 19636 atoms and 1 coordinate set(s) were parsed in 0.21s.
@> 8025 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> 32450 atoms and 1 coordinate set(s) were parsed in 0.37s.
@> 23812 atoms and 1 coordinate set(s) were parsed in 0.27s.
@> 19532 atoms and 1 coordinate set(s) were parsed in 0.22s.
@> 18545 atoms and 1 coordinate set(s) were parsed in 0.22s.
@> 10234 atoms and 1 coordinate set(s) were parsed in 0.12s.
@> 2037 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 8319 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> 1649 atoms and 1 coordinate s

@> 9248 atoms and 1 coordinate set(s) were parsed in 0.16s.
@> 4083 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 16845 atoms and 1 coordinate set(s) were parsed in 0.28s.
@> 4149 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 479 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 2512 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 8657 atoms and 1 coordinate set(s) were parsed in 0.14s.
@> 13221 atoms and 1 coordinate set(s) were parsed in 0.15s.
@> 461 atoms and 20 coordinate set(s) were parsed in 0.03s.
@> 23361 atoms and 1 coordinate set(s) were parsed in 0.28s.
@> 9844 atoms and 1 coordinate set(s) were parsed in 0.12s.
@> 3912 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 4077 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 7663 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> 10232 atoms and 1 coordinate set(s) were parsed in 0.13s.
@> 6063 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> 12950 atoms and 1 coordinate set(s

@> 60818 atoms and 1 coordinate set(s) were parsed in 0.97s.
@> 3885 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 427 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 3834 atoms and 30 coordinate set(s) were parsed in 0.34s.
@> 3572 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 7699 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> 12830 atoms and 1 coordinate set(s) were parsed in 0.20s.
@> 8038 atoms and 1 coordinate set(s) were parsed in 0.12s.
@> 2078 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 11096 atoms and 1 coordinate set(s) were parsed in 0.17s.
@> 13017 atoms and 1 coordinate set(s) were parsed in 0.16s.
@> 17274 atoms and 1 coordinate set(s) were parsed in 0.20s.
@> 7205 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> 7220 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> 4494 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 757 atoms and 6 coordinate set(s) were parsed in 0.02s.
@> 3686 atoms and 1 coordinate set(s

@> 30113 atoms and 1 coordinate set(s) were parsed in 0.46s.
@> 23355 atoms and 1 coordinate set(s) were parsed in 0.26s.
@> 2766 atoms and 8 coordinate set(s) were parsed in 0.08s.
@> 8669 atoms and 1 coordinate set(s) were parsed in 0.13s.
@> 30116 atoms and 1 coordinate set(s) were parsed in 0.34s.
@> 55690 atoms and 1 coordinate set(s) were parsed in 0.63s.
@> 3523 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 2886 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 9218 atoms and 1 coordinate set(s) were parsed in 0.10s.
@> 3531 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 13828 atoms and 1 coordinate set(s) were parsed in 0.19s.
@> 12891 atoms and 1 coordinate set(s) were parsed in 0.20s.
@> 3172 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 9940 atoms and 1 coordinate set(s) were parsed in 0.17s.
@> 6503 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 1438 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 1916 atoms and 1 coordinate set

@> 15680 atoms and 1 coordinate set(s) were parsed in 0.18s.
@> 3456 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 823 atoms and 20 coordinate set(s) were parsed in 0.05s.
@> 14064 atoms and 1 coordinate set(s) were parsed in 0.23s.
@> 15972 atoms and 1 coordinate set(s) were parsed in 0.18s.
@> 3570 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 4481 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 6789 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> 8679 atoms and 1 coordinate set(s) were parsed in 0.10s.
@> 7701 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> 1456 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 1413 atoms and 21 coordinate set(s) were parsed in 0.09s.
@> 52587 atoms and 1 coordinate set(s) were parsed in 0.83s.
@> 8615 atoms and 1 coordinate set(s) were parsed in 0.13s.
@> 3571 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 3276 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 31125 atoms and 1 coordinate set

@> 4094 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 3853 atoms and 1 coordinate set(s) were parsed in 0.18s.
@> 302 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 4400 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 3923 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 3461 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 1181 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 2800 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 8326 atoms and 1 coordinate set(s) were parsed in 0.17s.
@> 2330 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 3575 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 1224 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 4925 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 3039 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 4046 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 3594 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 57659 atoms and 1 coordinate set(s) we

@> 16897 atoms and 1 coordinate set(s) were parsed in 0.20s.
@> 12610 atoms and 1 coordinate set(s) were parsed in 0.20s.
@> 10234 atoms and 1 coordinate set(s) were parsed in 0.12s.
@> 3412 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 2706 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 6271 atoms and 1 coordinate set(s) were parsed in 0.10s.
@> 2271 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 32691 atoms and 1 coordinate set(s) were parsed in 0.37s.
@> 3442 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 5052 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 33890 atoms and 1 coordinate set(s) were parsed in 0.38s.
@> 9320 atoms and 1 coordinate set(s) were parsed in 0.11s.
@> 23568 atoms and 1 coordinate set(s) were parsed in 0.27s.
@> 42048 atoms and 1 coordinate set(s) were parsed in 0.47s.
@> 34335 atoms and 1 coordinate set(s) were parsed in 0.53s.
@> 7297 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> 35215 atoms and 1 coordinate 

@> 4383 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 14924 atoms and 1 coordinate set(s) were parsed in 0.17s.
@> 5658 atoms and 16 coordinate set(s) were parsed in 0.31s.
@> 2305 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 24756 atoms and 1 coordinate set(s) were parsed in 0.29s.
@> 9123 atoms and 1 coordinate set(s) were parsed in 0.14s.
@> 13408 atoms and 1 coordinate set(s) were parsed in 0.15s.
@> 45945 atoms and 1 coordinate set(s) were parsed in 0.58s.
@> 1724 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 2256 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 2050 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 1438 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 7855 atoms and 1 coordinate set(s) were parsed in 0.10s.
@> 1641 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 25132 atoms and 1 coordinate set(s) were parsed in 0.34s.
@> 10506 atoms and 1 coordinate set(s) were parsed in 0.17s.
@> 2073 atoms and 2 coordinate se

@> 3775 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 32105 atoms and 1 coordinate set(s) were parsed in 0.36s.
@> 1812 atoms and 20 coordinate set(s) were parsed in 0.19s.
@> 3643 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 5059 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 825 atoms and 10 coordinate set(s) were parsed in 0.04s.
@> 12979 atoms and 1 coordinate set(s) were parsed in 0.20s.
@> 3985 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 2521 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 3522 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 13223 atoms and 1 coordinate set(s) were parsed in 0.15s.
@> 980 atoms and 15 coordinate set(s) were parsed in 0.05s.
@> 6371 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 6077 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 511 atoms and 12 coordinate set(s) were parsed in 0.03s.
@> 6448 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> 954 atoms and 20 coordinate set(s

@> 1250 atoms and 17 coordinate set(s) were parsed in 0.09s.
@> 30576 atoms and 1 coordinate set(s) were parsed in 0.48s.
@> 4768 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 15418 atoms and 1 coordinate set(s) were parsed in 0.18s.
@> 10327 atoms and 1 coordinate set(s) were parsed in 0.12s.
@> 9852 atoms and 1 coordinate set(s) were parsed in 0.15s.
@> 25020 atoms and 1 coordinate set(s) were parsed in 0.39s.
@> 10234 atoms and 1 coordinate set(s) were parsed in 0.12s.
@> 55415 atoms and 1 coordinate set(s) were parsed in 0.63s.
@> 8046 atoms and 1 coordinate set(s) were parsed in 0.10s.
@> 32488 atoms and 1 coordinate set(s) were parsed in 0.38s.
@> 3618 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 19543 atoms and 1 coordinate set(s) were parsed in 0.22s.
@> 24067 atoms and 1 coordinate set(s) were parsed in 0.27s.
@> 809 atoms and 20 coordinate set(s) were parsed in 0.05s.
@> 13270 atoms and 1 coordinate set(s) were parsed in 0.15s.
@> 25819 atoms and 1 coordina

@> 3652 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 16102 atoms and 1 coordinate set(s) were parsed in 0.20s.
@> 7427 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> 9927 atoms and 1 coordinate set(s) were parsed in 0.16s.
@> 8086 atoms and 1 coordinate set(s) were parsed in 0.10s.
@> 7869 atoms and 1 coordinate set(s) were parsed in 0.10s.
@> 8240 atoms and 1 coordinate set(s) were parsed in 0.10s.
@> 2707 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 3893 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 2742 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 27580 atoms and 1 coordinate set(s) were parsed in 0.33s.
@> 640 atoms and 20 coordinate set(s) were parsed in 0.04s.
@> 75994 atoms and 1 coordinate set(s) were parsed in 0.89s.
@> 1534 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 1639 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 2771 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 2412 atoms and 1 coordinate set(s)

@> 52370 atoms and 1 coordinate set(s) were parsed in 0.83s.
@> 3044 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 1594 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 16804 atoms and 1 coordinate set(s) were parsed in 0.28s.
@> 2403 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 1472 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 25960 atoms and 1 coordinate set(s) were parsed in 0.45s.
@> 1176 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 1719 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 1644 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 5757 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 10342 atoms and 1 coordinate set(s) were parsed in 0.12s.
@> 1927 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 10932 atoms and 1 coordinate set(s) were parsed in 0.13s.
@> 4541 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 11737 atoms and 1 coordinate set(s) were parsed in 0.15s.
@> 1650 atoms and 15 coordinate se

@> 3512 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 21368 atoms and 1 coordinate set(s) were parsed in 0.25s.
@> 5270 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 7091 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> 5023 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> 4494 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 1806 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 16887 atoms and 1 coordinate set(s) were parsed in 0.22s.
@> 7494 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> 4534 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 2099 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 2275 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 2764 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 60408 atoms and 1 coordinate set(s) were parsed in 1.01s.
@> 4074 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 3072 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 1358 atoms and 1 coordinate set(s)

@> 3526 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 1841 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 25281 atoms and 1 coordinate set(s) were parsed in 0.44s.
@> 3627 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 3661 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 3926 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 6849 atoms and 1 coordinate set(s) were parsed in 0.11s.
@> 12585 atoms and 1 coordinate set(s) were parsed in 0.15s.
@> 13229 atoms and 1 coordinate set(s) were parsed in 0.16s.
@> 2719 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 8727 atoms and 1 coordinate set(s) were parsed in 0.15s.
@> 4111 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 2772 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 13874 atoms and 1 coordinate set(s) were parsed in 0.23s.
@> 2726 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 22259 atoms and 1 coordinate set(s) were parsed in 0.37s.
@> 1547 atoms and 1 coordinate set(

@> 1756 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 6151 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 7948 atoms and 1 coordinate set(s) were parsed in 0.12s.
@> 3706 atoms and 12 coordinate set(s) were parsed in 0.15s.
@> 837 atoms and 20 coordinate set(s) were parsed in 0.06s.
@> 7248 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> 13019 atoms and 1 coordinate set(s) were parsed in 0.17s.
@> 4798 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 2429 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 26164 atoms and 1 coordinate set(s) were parsed in 0.32s.
@> 8671 atoms and 1 coordinate set(s) were parsed in 0.14s.
@> 4925 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 5551 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 7743 atoms and 1 coordinate set(s) were parsed in 0.12s.
@> 698 atoms and 21 coordinate set(s) were parsed in 0.05s.
@> 18262 atoms and 1 coordinate set(s) were parsed in 0.21s.
@> 2346 atoms and 1 coordinate set(s

@> 8662 atoms and 1 coordinate set(s) were parsed in 0.15s.
@> 7123 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> 533 atoms and 20 coordinate set(s) were parsed in 0.04s.
@> 7013 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> 5148 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 346 atoms and 20 coordinate set(s) were parsed in 0.02s.
@> 19103 atoms and 1 coordinate set(s) were parsed in 0.23s.
@> 9182 atoms and 1 coordinate set(s) were parsed in 0.16s.
@> 21807 atoms and 1 coordinate set(s) were parsed in 0.38s.
@> 22637 atoms and 1 coordinate set(s) were parsed in 0.28s.
@> 13206 atoms and 1 coordinate set(s) were parsed in 0.15s.
@> 1671 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 4909 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> 2824 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 4175 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 34255 atoms and 1 coordinate set(s) were parsed in 0.40s.
@> 7532 atoms and 1 coordinate set(

@> 510 atoms and 12 coordinate set(s) were parsed in 0.06s.
@> 6271 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> 5267 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 8695 atoms and 1 coordinate set(s) were parsed in 0.11s.
@> 6858 atoms and 1 coordinate set(s) were parsed in 0.11s.
@> 3983 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 2824 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 7965 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> 13456 atoms and 1 coordinate set(s) were parsed in 0.15s.
@> 3651 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 13663 atoms and 1 coordinate set(s) were parsed in 0.15s.
@> 3276 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 4019 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 19490 atoms and 1 coordinate set(s) were parsed in 0.22s.
@> 2944 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 2747 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 9642 atoms and 1 coordinate set(s)

@> 1753 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 6055 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> 8669 atoms and 1 coordinate set(s) were parsed in 0.10s.
@> 2153 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 25117 atoms and 1 coordinate set(s) were parsed in 0.30s.
@> 4108 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 1735 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 2211 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 15554 atoms and 1 coordinate set(s) were parsed in 0.25s.
@> 6701 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> 8666 atoms and 1 coordinate set(s) were parsed in 0.11s.
@> 14529 atoms and 1 coordinate set(s) were parsed in 0.17s.
@> 1234 atoms and 10 coordinate set(s) were parsed in 0.05s.
@> 6930 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> 3454 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 3608 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 28722 atoms and 1 coordinate set(

@> 47736 atoms and 1 coordinate set(s) were parsed in 0.53s.
@> 10657 atoms and 1 coordinate set(s) were parsed in 0.17s.
@> 4374 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 37084 atoms and 1 coordinate set(s) were parsed in 0.50s.
@> 7749 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> 2070 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 6590 atoms and 1 coordinate set(s) were parsed in 0.11s.
@> 8976 atoms and 1 coordinate set(s) were parsed in 0.10s.
@> 4177 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 1635 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 1997 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 14876 atoms and 1 coordinate set(s) were parsed in 0.17s.
@> 5412 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 5831 atoms and 1 coordinate set(s) were parsed in 0.10s.
@> 8613 atoms and 1 coordinate set(s) were parsed in 0.14s.
@> 13223 atoms and 1 coordinate set(s) were parsed in 0.15s.
@> 12438 atoms and 1 coordinate set

@> 8656 atoms and 1 coordinate set(s) were parsed in 0.11s.
@> 7600 atoms and 1 coordinate set(s) were parsed in 0.10s.
@> 4437 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> 1600 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 8642 atoms and 1 coordinate set(s) were parsed in 0.14s.
@> 2426 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 5029 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 4295 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 1923 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 2622 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 4109 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 32636 atoms and 1 coordinate set(s) were parsed in 0.39s.
@> 25022 atoms and 1 coordinate set(s) were parsed in 0.29s.
@> 2453 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 4317 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 4559 atoms and 15 coordinate set(s) were parsed in 0.23s.
@> 9265 atoms and 1 coordinate set(s)

      PDB ID Chain  Helix Start  Helix End              Helix Start CA  \
0       4X5M     C            2         18     [67.686, 7.268, -17.81]   
1       4X5M     C           19         30   [64.524, 19.012, -42.078]   
2       4X5M     C           36         58   [58.996, 33.566, -46.101]   
3       4X5M     C           59         87   [67.563, 20.929, -19.923]   
4       4X5M     C            2         19     [67.686, 7.268, -17.81]   
5       4X5M     C           19         30   [64.524, 19.012, -42.078]   
6       4X5M     C           36         58   [58.996, 33.566, -46.101]   
7       4X5M     C           59         93   [67.563, 20.929, -19.923]   
8       4X5M     C            3         19    [64.406, 6.824, -19.678]   
9       4X5M     C           19         30   [64.524, 19.012, -42.078]   
10      4X5M     C           36         58   [58.996, 33.566, -46.101]   
11      4X5M     C           59         91   [67.563, 20.929, -19.923]   
12      4CHW     D           12       

In [97]:
df_helices = pd.DataFrame(helix_sequences, columns=columns)
print(df_helices)

      PDB ID Chain  Helix Start  Helix End              Helix Start CA  \
0       4X5M     C            2         18     [67.686, 7.268, -17.81]   
1       4X5M     C           19         30   [64.524, 19.012, -42.078]   
2       4X5M     C           36         58   [58.996, 33.566, -46.101]   
3       4X5M     C           59         87   [67.563, 20.929, -19.923]   
4       4X5M     C            2         19     [67.686, 7.268, -17.81]   
5       4X5M     C           19         30   [64.524, 19.012, -42.078]   
6       4X5M     C           36         58   [58.996, 33.566, -46.101]   
7       4X5M     C           59         93   [67.563, 20.929, -19.923]   
8       4X5M     C            3         19    [64.406, 6.824, -19.678]   
9       4X5M     C           19         30   [64.524, 19.012, -42.078]   
10      4X5M     C           36         58   [58.996, 33.566, -46.101]   
11      4X5M     C           59         91   [67.563, 20.929, -19.923]   
12      4CHW     D           12       

In [96]:
df_helices = df_helices.drop_duplicates()
df_helices.to_csv("helices.csv", sep=',', encoding='utf-8', index=False)

      PDB ID Chain  Helix Start  Helix End  Helix Length  \
0       4kk5     B           37         60            23   
1       4kk5     B           84         97            13   
2       4kk5     B          129        142            13   
3       4kk5     B          144        156            12   
4       4kk5     B          216        232            16   
5       4kk5     B          253        277            24   
6       4kk5     B          290        304            14   
7       4kk5     B          338        351            13   
8       4kk5     B          353        368            15   
9       4kk5     B          423        439            16   
10      3b5x     B           30         44            14   
11      3b5x     B           68         82            14   
12      3b5x     B          145        161            16   
13      3b5x     B          173        188            15   
14      3b5x     B          245        259            14   
15      3b5x     B          288        3