In [1]:
import pandas as pd
import numpy as np
import json

In [2]:
# importing files

path_edges = 'ring_files/3OG7_edges.txt'
path_nodes = 'ring_files/3OG7_nodes.txt'

edges = pd.read_csv(path_edges, sep='\t')
nodes = pd.read_csv(path_nodes, sep='\t')


In [3]:
# veryfing current state of the data

edges.head()

Unnamed: 0,NodeId1,Interaction,NodeId2,Distance,Angle,Energy,Atom1,Atom2,Donor,Positive,Cation,Orientation
0,A:1:_:032,IAC:LIG_SC,A:463:_:ILE,3.032,-999.9,0.0,C11,HG23,,,,
1,A:1:_:032,IAC:LIG_SC,A:464:_:GLY,6.337,-999.9,0.0,C09,HA1,,,,
2,A:1:_:032,IAC:LIG_SC,A:468:_:PHE,5.607,-999.9,0.0,O19,HE1,,,,
3,A:1:_:032,IAC:LIG_MC,A:470:_:THR,6.406,-999.9,0.0,F25,O,,,,
4,A:1:_:032,IAC:LIG_SC,A:471:_:VAL,2.651,-999.9,0.0,O19,HG21,,,,


In [4]:
# veryfing current state of the data

nodes.head()

Unnamed: 0,NodeId,Chain,Position,Residue,Dssp,Degree,Bfactor_CA,x,y,z,pdbFileName,Rapdf,Tap
0,A:450:_:TRP,A,450,TRP,,6,51.35,-10.992,8.142,-13.891,3OG7.pdb#450.A,-23.867,0.501
1,A:451:_:GLU,A,451,GLU,E,2,44.36,-10.452,10.25,-17.0,3OG7.pdb#451.A,-33.699,0.339
2,A:452:_:ILE,A,452,ILE,E,5,23.88,-7.156,12.13,-16.979,3OG7.pdb#452.A,-98.608,0.868
3,A:453:_:PRO,A,453,PRO,,2,36.64,-7.568,15.633,-18.501,3OG7.pdb#453.A,-51.057,0.194
4,A:454:_:ASP,A,454,ASP,T,2,51.35,-5.603,16.529,-21.648,3OG7.pdb#454.A,-44.321,0.074


In [5]:
#Dropping unwanted columns 

nodes.drop(columns = ['x', 'y', 'z', 'Rapdf', 'Tap'], inplace = True)
edges.drop(columns = ['Orientation', 'Cation', 'Positive', 'Donor', 'Angle', 'Energy'], inplace = True)

In [6]:
# Other Treatments 

# Casting pdbFileName column as string type, because it came as an object 
nodes = nodes.astype({'pdbFileName' : 'string'})

#Creating the PLDDT column that indicates if the Bfactor_CA is PLDDT
nodes['PLDDT'] = nodes['pdbFileName'].apply(lambda x: False if x[0:2] != "AF" else True)

#Changing the pdbFileName column to contain only the pbd name and its name to pdb
nodes['pdbFileName'] = nodes['pdbFileName'].apply(lambda x: x[0:4])
nodes.rename(columns={'pdbFileName' : 'pdb'},  inplace= True)

In [7]:
class node:

    def __init__(self, param):

        self.NodeId = param[0]
        self.Chain = param[1]
        self.Position = int(param[2])
        self.Residue = param[3]
        self.Dssp = param[4]
        self.Degree = int(param[5])
        self.Bfactor_CA = param[6]
        self.pdb = param[7]
        self.plddt = bool(param[8])

    
    def to_dict(self):

        atributes = {"NodeId" : self.NodeId,
                     "Chain" : self.Chain,
                     "Position" : self.Position, 
                     "Residue" : self.Residue,
                     "Dssp" :  self.Dssp,
                     "Degree": self.Degree,
                     "Bfactor_CA" :  self.Dssp,
                     "pdb": self.pdb,
                     "plddt" : self.plddt
                    }
        return (atributes)
        


In [8]:
class edge:

    def __init__(self, param):

        self.NodeId1 = param[0]
        self.NodeId2 = param[1]
        self.Interaction = param[2]
        self.Distance = param[3]
        self.Atom1 = param[4]
        self.Atom2 = param[5]
    
    def to_dict(self):

        atributes = {'NodeId1' : self.NodeId1,
                     "NodeId2" : self.NodeId2,
                     "Interaction" : self.Interaction, 
                     "Distance" : self.Distance,
                     "Atom1" :  self.Atom1,
                     "Atom2": self.Atom2
                    }
        
        return (atributes)


In [9]:
class pdb:

    def __init__(self, nodes, edges):

        self.Nodes = nodes
        self.Edges = edges
        self.Name = nodes[0].pdb

    def get_nodes(self):
        return([self.Nodes])
    
    def get_edges(self):
        return([self.Edges])


In [10]:
#load data into lists of edges and nodes

edges_list = []
nodes_list = []

for i in range(0, edges.shape[0]):
    edge_append = edge(list(edges.iloc[i, :]))
    edges_list.append(edge_append)

for i in range(0, nodes.shape[0]):
    node_append = node(list(nodes.iloc[i, :]))
    nodes_list.append(node_append)

In [11]:
#creating pdb object

pdb_3OG7 = pdb(nodes_list, edges_list)

In [12]:
#functions that take the nodes and edges objects and write them in a json file 

def nodes_to_json(nodes, file):
    
    file.write('"nodes" : [')

    for i in range(0,len(nodes)):
        if(i == len(nodes) - 1):
            json.dump((nodes[i].to_dict()), file, indent = 6)
            file.write("],\n")
        else:
            json.dump((nodes[i].to_dict()), file, indent = 6)
            file.write(",")

def edges_to_json(edges, file):

    file.write('"edges" : [')
    for i in range(0,len(edges)):
        if(i == len(edges) - 1):
            json.dump((edges[i].to_dict()), file, indent = 6)
            file.write("]")
        else:
            json.dump((edges[i].to_dict()), file, indent = 6)
            file.write(",")
    

In [15]:
#This function takes an pbd object, creates a json file and writes the pbd object in it

def to_json(pdb_obj):

    edges = pdb_obj.Edges
    nodes = pdb_obj.Nodes

    #declaring filename as the name of the name of the pdb object and creating the file
    filename = 'output_files/' + pdb_obj.Name + '.json'
    file = open(filename, 'w')
    
    
    file.write('{ "pdb" : "')
    file.write(pdb_obj.Name)
    file.write('", \n')

    #writing nodes and edges into the file
    nodes_to_json(nodes, file)
    edges_to_json(edges, file)

    
    file.write("}")
    
    file.close()

    return

In [16]:
#testing !!
to_json(pdb_3OG7)

In [68]:
import itertools

out_file = open("myfile.json", "w")


for i in range(1, len(lista_nodes[0].to_dict().keys()) + 1): 
    sliced_key = dict(itertools.islice(lista_nodes[0].to_dict().items(),i - 1 , i))
    json.dump(sliced_key, out_file)
    print(sliced_key)

{'NodeId': 'A:450:_:TRP'}
{'Chain': 'A'}
{'Position': 450}
{'Residue': 'TRP'}
{'Dssp': ' '}
{'Degree': 6}
{'Bfactor_CA': ' '}
{'pdb': '3OG7'}
{'plddt': False}


dict_keys(['NodeId', 'Chain', 'Position', 'Residue', 'Dssp', 'Degree', 'Bfactor_CA', 'pdb', 'plddt'])