<a href="https://colab.research.google.com/github/savpatpanda/protein/blob/master/notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!git clone https://github.com/savpatpanda/protein.git
%cd protein
!ls 
!pip3 install -r requirements.txt
# to push to GitHub, File > Save a copy in GitHub

Cloning into 'protein'...
remote: Enumerating objects: 65, done.[K
remote: Counting objects: 100% (65/65), done.[K
remote: Compressing objects: 100% (55/55), done.[K
remote: Total 65 (delta 29), reused 30 (delta 10), pack-reused 0[K
Unpacking objects: 100% (65/65), done.
/content/protein
data  notebook.ipynb  plotting.py  proteinModel.py  requirements.txt
Collecting gemmi==0.4.2
[?25l  Downloading https://files.pythonhosted.org/packages/da/6a/fe1a65806633ea2129507b0f8b4034afdbf383e474a12152839ee50a52eb/gemmi-0.4.2-cp36-cp36m-manylinux2010_x86_64.whl (1.8MB)
[K     |████████████████████████████████| 1.8MB 1.4MB/s 
Collecting matplotlib==3.3.2
[?25l  Downloading https://files.pythonhosted.org/packages/cd/d6/8c4dfb23151d5a494c66ebbfdb5c8c433b44ec07fae52da5939fcda0943f/matplotlib-3.3.2-cp36-cp36m-manylinux1_x86_64.whl (11.6MB)
[K     |████████████████████████████████| 11.6MB 357kB/s 
[?25hCollecting numpy==1.19.2
[?25l  Downloading https://files.pythonhosted.org/packages/63/97/af

In [None]:
import numpy as np
def getFractionalToCartesianRotationMatrix(a, b, c, alpha, beta, gamma):
  alpha = alpha*np.pi/180
  beta  = beta*np.pi/180
  gamma  = gamma*np.pi/180

  n2 = (np.cos(alpha) - np.cos(gamma) * np.cos(beta)) / np.sin(gamma)
  M  = np.array([ [a,0,0],
                  [b*np.cos(gamma),b*np.sin(gamma),0], 
                  [c*np.cos(beta), c*n2, c*np.sqrt(np.sin(beta)**2-n2**2)]
                 ])
  return M


In [None]:
from gemmi import cif 
from plotting import plotSpheres
import matplotlib.pyplot as plt

parseCIFNum = lambda num: float(num[:-4] if num[-1] == ')' else num)

def parseCIFFile(cifFileName):
  doc = cif.read_file("./data/185472.cif")
  block = doc.sole_block()
  fractionalSpace = [ block.find_pair("_cell_length_a"),
                      block.find_pair("_cell_length_b"),
                      block.find_pair("_cell_length_c"),
                      block.find_pair("_cell_angle_alpha"),
                      block.find_pair("_cell_angle_beta"),
                      block.find_pair("_cell_angle_gamma")]

  fractionalSpace = map(lambda x: parseCIFNum(x[1]), fractionalSpace)
  rotationMatrix = getFractionalToCartesianRotationMatrix(*fractionalSpace)
  
  x = [parseCIFNum(x) for x in list(block.find_loop("_atom_site_fract_x"))]
  y = [parseCIFNum(x) for x in list(block.find_loop("_atom_site_fract_y"))]
  z = [parseCIFNum(x) for x in list(block.find_loop("_atom_site_fract_z"))]
  coords = np.array(list(zip(x,y,z)))

  return coords, rotationMatrix
  
try:
  coords, rotationMatrix = parseCIFFile("./data/185472.cif")
  cartesian = np.matmul(coords, rotationMatrix)
  fig = plt.figure()
  ax = fig.add_subplot(111, projection='3d')
  plotSpheres(cartesian * 30, ax)
  plt.show()
  
except Exception as e:
  print(e)

ImportError: ignored

<Figure size 432x288 with 1 Axes>

In [8]:
class Residue():
  def __init__ (self, atoms, meanX, meanY, meanZ):
    self.atoms = atoms
    self.position = (meanX, meanY, meanZ)


class Atom():
  def __init__(self, x, y, z, atomType, residueComponents):
    self.position = (x,y,z)
    self.atomType = atomType
    self.residueComponents = residueComponents


In [13]:
import pandas as pd

pdb = open('data/1m46.pdb')
data = [] ##residueCode, residueNumber, x, y, z, atomType - 2,5,6,7,8, 11
positions = [2,5,6,7,8,-1]

for line in pdb:
    elements = line.split()
    if elements[0] == "ATOM" and elements[2] not in ["CA", "N", "O", "C"]:
        if len(elements) < 12:
          left = elements[2][:3]
          right = elements[2][3:]
          elements[2] = left
          elements.insert(3,right)

        item = []
        for i in positions:
          if i == 5:
            item.append(int(elements[i]))
          elif i in [6,7,8]:
            item.append(float(elements[i]))
          else:
            item.append(elements[i])
          
        data.append(item)

df = pd.DataFrame(data = data, columns = ["residueCode", "residueNumber", "x", "y", "z", "atomType"])
df = df.groupby(by = ['residueNumber'])

residues = []
for key, item in df:
    atoms = []
    atomDf = df.get_group(key)
    for index, row in atomDf.iterrows():
      atoms.append(Atom(row["x"], row["y"], row["z"], row["atomType"], row["residueCode"]))
    residues.append(Residue(atoms, atomDf["x"].mean(), atomDf["y"].mean(), atomDf["z"].mean()))
for residue in residues:
  print(len(residue.atoms), residue.position)



2 (11.2025, 73.72, -4.494)
1 (8.433, 75.418, -7.783)
3 (10.886333333333333, 81.46133333333333, -7.092666666666666)
7 (4.244714285714285, 79.85657142857144, -12.630571428571429)
1 (4.794, 76.593, -5.315)
4 (8.94425, 78.15625, -2.65625)
5 (5.9037999999999995, 85.48920000000001, -6.8538)
4 (1.5470000000000002, 81.78425, -6.4342500000000005)
4 (2.7459999999999996, 78.67175, -0.19224999999999998)
7 (6.385571428571429, 86.38485714285714, 1.192)
3 (0.9973333333333333, 86.19666666666666, -2.3813333333333335)
4 (-2.4627499999999998, 82.52674999999999, 0.40800000000000003)
7 (2.426142857142857, 81.94771428571428, 5.735142857142857)
4 (2.6755, 90.48649999999999, 2.7129999999999996)
5 (-3.7954, 87.2314, 6.648999999999999)
5 (1.489, 93.8244, 9.096599999999999)
5 (4.8948, 95.2936, 6.447399999999999)
1 (8.16, 91.876, 5.212)
4 (6.072, 86.222, 6.2459999999999996)
1 (5.35, 87.953, 11.853)
5 (12.999, 87.0212, 13.319399999999998)
4 (7.745500000000001, 85.05425, 16.262999999999998)
2 (4.547000000000001, 84