In [1]:
import networkx as nx
import json
import numpy as np
import pandas as pd
import copy
import matplotlib.pyplot as plt
from networkx.readwrite import json_graph

Load networkx graphs

In [2]:
def load_data_from_file(filename):
    with open(filename, "r") as file_handle:
        string_dict = json.load(file_handle)
    return _load_data_from_string_dict(string_dict)

def _load_data_from_string_dict(string_dict):
    result_dict = {}
    for key in string_dict:
        data = copy.deepcopy(string_dict[key])
        if 'edges' in data:
            data["links"] = data.pop("edges")
        graph = nx.node_link_graph(data)
        result_dict[key] = graph
    return result_dict

In [3]:
# dictionary of SMILES and values are graphs
loaddir = "../data/graphs/"
train_data = load_data_from_file(loaddir+"cleaned_graph_data_10June.json") 

Pick out isolated atoms in training

In [5]:
atom_orb_train = []

for mol in train_data:
    if len(train_data[mol].nodes)==1:
        atom = train_data[mol].nodes[0]['atom_type']
        orbitals = train_data[mol].nodes[0]['orbitals']
        for orb in orbitals:
            atom_orb_train.append((atom, orb))

Lookup table - exclude isolated atoms in training data

In [6]:
# LWNL dataframe
lwnl_df = pd.read_csv('../data/lookups/lwnl.csv')

elem_list_lwnl = list(lwnl_df['Element'])
orb_list_lwnl = list(lwnl_df['Orbital'])
be_list_lwnl = list(lwnl_df['Binding Energy'])

In [7]:
orb_dict_lwnl = {}
be_dict_lwnl = {}

for atom,orb,be in zip(elem_list_lwnl, orb_list_lwnl, be_list_lwnl):
    if (atom,orb) not in atom_orb_train:
        if atom not in orb_dict_lwnl:
            orb_dict_lwnl[atom] = [orb]
            be_dict_lwnl[atom] = [be]
        else:
            orb_dict_lwnl[atom].append(orb)
            be_dict_lwnl[atom].append(be)

In [8]:
len(orb_dict_lwnl)

89

In [9]:
S = 0

for atom in orb_dict_lwnl:
    S += len(orb_dict_lwnl[atom])
    
S

940

Create netx objects

In [10]:
graphs_lwnl = {}

for atom in orb_dict_lwnl:
    G = nx.Graph()
    G.add_node(0,
               atom_type=atom,
               formal_charge=0,
               orbitals=orb_dict_lwnl[atom],
               binding_energies=be_dict_lwnl[atom],
               e_neg_score=[0.0])
    graphs_lwnl[atom] = G

Save as JSON file

In [11]:
serializable_dict = {}

for key, G in graphs_lwnl.items():
    
    for n in G.nodes:
        G.nodes[n]["id"] = n
    
    data = json_graph.node_link_data(G)
    serializable_dict[key] = data

In [12]:
with open("../data/graphs/lwnl.json", 'w') as f:
    json.dump(serializable_dict, f, indent=2)