In [40]:
import math
import numpy as np
import pickle
from neo4j import GraphDatabase
import os
from dotenv import load_dotenv

In [50]:
load_dotenv()

True

In [51]:
print(os.getenv('NEO4J_USERNAME'))

neo4j


In [17]:
def three2one():
    three_2_one = {}
    with open("123.txt") as f:
        symbols = f.readlines()
    for sym in symbols:
        sym = sym.split()
        one = sym[0]
        three = sym[1].upper()
        three_2_one[three] = one
    return three_2_one

In [30]:
class Extractor:
    def __init__(self,pdb_filepath):
        self.filepath = pdb_filepath
        self.three_2_one = three2one()
        self.file_lines = self.read_pdb() # collect the lines
        self.acc = self.get_alpha_carbon_coordinates() # a dictionary, containing co-ordinates of alpha carbons (acc-alpha carbon co-ordinates)
        self.distance_dict = self.compute_distances() # a dictionary with pairwise distances
        
    def read_pdb(self):
        '''Return lines of the pdb file'''
        with open(self.filepath) as f:
            lines = f.readlines()
        return lines
    
    def get_alpha_carbon_coordinates(self):
        '''Returns a dictionary of alpha-carbon name and its coordinates'''
        
        # extract the pdb id first
        pdb_id = self.file_lines[0].strip("\n").split()[-1]
        
        alpha_coord = {}
        for line in self.file_lines:
            words = line.strip("\n").split()
            if "ATOM" in words and "CA" in words:
                x = float(words[6])
                y = float(words[7])
                z = float(words[8])
                chain = words[4]
                symbol = self.three_2_one[words[3]]
                seq = words[5]
                name = symbol + chain + seq + pdb_id
                alpha_coord[name] = [x,y,z]
        return alpha_coord
                
    def euclidean_distance(self,t1,t2):
      # takes in two tuples
      return np.linalg.norm(np.array(t1)-np.array(t2))
                
    def compute_distances(self):
        distance_dict = {}
        alphas = list(self.acc.keys())
        for i in range(len(alphas)):
        # only consider the atoms ahead of this atom
            atoms = alphas[i+1:]
            for atom in atoms:
                pair = alphas[i]+"<--->"+atom
                distance_dict[pair] = self.euclidean_distance(self.acc[alphas[i]],self.acc[atom])
        return distance_dict
    
    def save_pairwise_distances(self,path_to_save):
        with open(path_to_save,'wb') as f:
            pickle.dump(self.distance_dict,f)

In [31]:
ex = Extractor("6wps.pdb")

In [33]:
ex.save_pairwise_distances("./oop_dist_dict.pkl")

In [59]:
url = os.getenv("NEO4J_URI")
username = os.getenv("NEO4j_USERNAME")
password = os.getenv("NEO4J_PASSWORD")
print(url)

class GraphBuilder:
    def __init__(self,path_of_distance_dict,threshold):
        self.driver = GraphDatabase.driver(url, auth=(username, password))
        self.distance_dict = self.load_distance_dict(path_of_distance_dict)
        self.threshold = threshold
        self.edges = self.get_edges()
        
    def load_distance_dict(self,path_to_load):
        try:
            with open(path_to_load, "rb") as f:
                distance_dict = pickle.load(f)
            return distance_dict
        except:
            raise FileNotFoundError("File path seems to be wrong. Please try again with a valid pickle file.")
            
    def get_edges(self):
        edges = []
        for key, value in self.distance_dict.items():
            if value <= self.threshold:
                alpha1, alpha2 = key.split("<--->")
                edges.append((alpha1, alpha2,value))
        return edges
    
    def make_graph(self):
        with self.driver.session() as session:
            for i, edge in enumerate(self.edges):
                alpha1 = edge[0]
                alpha2 = edge[1]
                distance = edge[2]
                session.write_transaction(create_edge, alpha1, alpha2,distance)
                session.write_transaction(create_edge, alpha2, alpha1,distance)
                if (i + 1) % 100 == 0:
                    print(i + 1, "records added.")
        driver.close()
        print("Connection closed, Done !")
            
            
def create_edge(tx, alpha1, alpha2,distance):
    # alpha1 and alpha 2 will be like-> AA27-6wps
    # name = symbol + chain + seq + pdbid
    # sy1 = alpha1[0]
    ch1 = alpha1[1]
    # sq1 = alpha1[2:4]
    # pdbid1 = alpha1[5:]

    # sy2 = alpha2[0]
    ch2 = alpha2[1]
    # sq2 = alpha2[2:4]
    # pdbid2 = alpha2[5:]

    tx.run("MERGE (a1:AMINO_ACID {name:$alpha1, chain:$ch1}) "
           "MERGE (a2:AMINO_ACID {name:$alpha2, chain:$ch2}) "
           "MERGE (a1)-[:CONNECTED_TO {distance:$distance}]->(a2)", alpha1=alpha1, alpha2=alpha2, ch1=ch1, ch2=ch2,distance=distance)

neo4j+s://b35890ab.databases.neo4j.io


In [60]:
gb = GraphBuilder(path_of_distance_dict="oop_dist_dict.pkl",threshold=3.8)

In [61]:
gb.make_graph()

100 records added.
200 records added.
300 records added.
400 records added.
500 records added.
600 records added.
700 records added.
800 records added.


KeyboardInterrupt: 