This notebook addresses the question, "Can we represent a molecule as a graph via a 1D column vector or a 2D matrix of fixed length, with maximum number of atoms n_rows?" Then, can we use this representation to learn neural fingerprints? E.g., can we make an aromatic ring detector? 

Scheme:
feature_matrix = X
for each ligand:
    choose a central atom. this can be the atom (node) that minimizes distance to furthest heavy atom in graph.
    set first row of X to be this central atom
    set next four rows to be the atoms bonded to that centrl atom
        set zeros for rows where row ind > n_bonds of atom
    for each of those atoms:
        repeat. find their neighbors. add to matrix.

algorithm: breadth-first search:
1. create networkx graph based on molecule
2. find "central" atom (different strategies)
3. define atom matrix of size (1+4+4*3^(L-1)) x (n_features_per_atom)
4. start atom queue q
5. central_atom.layer = 0; central_atom.row_idx = 0;
6. q.enqueue(central_atom)
7. define adjacency matrix of size (1+4+4*3^(L-1)) x 4

def get_row_idx(curr_layer, prev_row_idx, curr_neighbor_idx):
    if curr_layer == 0:
        return(0)
    if curr_layer == 1:
        row_idx = 1 + curr_neighbor_idx
    if layer == 2:
        last_max = 5
        row_idx = last_max + (3*(prev_row_idx-last_max)) + curr_neighbor_idx
    if layer > 2:
        last_max = 5 + 4*3^(curr_layer-2) 
        row_idx = last_max + 3*(prev_row_idx-last_max) + curr_neighbor_idx
    return(row_idx)
    

while q.is_not_empty():
    a = q.dequeue()
    a.visited = True
    for n_idx, n in enumerate(a.neighbors()):
        if not n.visited:
            row_idx = c
            n.layer = a.layer + 1
            row_idx = get_row_idx(n.layer, a.row_idx, n_idx)
            n.row_idx = row_idx
            adj_matrix[a.row_idx][n_idx] = n.row_idx
            atom_matrix[row_idx][elem_to_idx[n.elem]] = 1

input_matrix = tf.concat([atom_matrix, atom_matrix[adj_matrix[:,0]], atom_matrix[adj_matrix[:,1]], atom_matrix[adj_matrix[:,2]], atom_matrix[adj_matrix[:,3]]

neural net:
h1 = relu([tf.zeros([n_features_per_atom, 4]) * input_matrix + bias))
h1_conc = tf.concat([h1, h1[adj_matrix[:,0], ..., h1[adj_matrix[:,3])

repeat h1 to get h2


In [2]:
import numpy as np
np.cos( [-0.91292151,  3.09695411,  1.06725919])

array([ 0.61143659, -0.99900387,  0.48252666])

dihedral predictor pseudocode:

get bonds for molecule
create networkx graph out of molecule (use atom indices)

for each edge:
   for neighbor_i in atom_i.neighbors():
       if neighbor_i == atom_j: continue
       for neighbor_j in atom_j.neighbors():
           if neighbor_j == atom_i: continue
           dihedrals.append((neighbor_i, atom_i, neighbor_j, atom_j))
           check to make sure (atom_j, neighbor_j, atom_i, neighbor_i)) not already in list

for dihedral in dihedrals:
    angle =  rdMolTransforms.GetDihedralDeg(c, 0,1,2,3)


In [None]:
"""
#MolGAN

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.cast(tf.random_normal(shape=size, stddev=xavier_stddev), tf.float32)

atom_dim = 75
hidden_dim = 50

X = tf.placeholder(tf.float32, shape=[None, 1])

D_W1 = tf.Variable(xavier_init([atom_dim, hidden_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[hidden_dim]))

D_W2 = tf.Variable(xavier_init([hidden_dim, hidden_dim]))
D_b2 = tf.Variable(tf.zeros(shape=[hidden_dim]))

theta_D = [D_W1, D_W2, D_b1, D_b2]


Z = tf.placeholder(tf.float32, shape=[1, 50])

G_W1 = tf.Variable(xavier_init([100, 128]))
G_b1 = tf.Variable(tf.zeros(shape=[128]))

G_W2 = tf.Variable(xavier_init([128, 1]))
G_b2 = tf.Variable(tf.zeros(shape=[1]))

theta_G = [G_W1, G_W2, G_b1, G_b2]


def sample_Z(m, n):
    return np.sort(np.random.uniform(-1., 1., size=[m, n]))


def generator(z, is_training=True):
    G_h1 = tf.nn.tanh(tf.matmul(z, G_W1) + G_b1)
    G_h1 = tf.contrib.layers.batch_norm(G_h1, 
                                        center=True, scale=True, 
                                        is_training=is_training)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)

    return G_prob


def discriminator(x):
    D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1)
    D_h1 = tf.contrib.layers.batch_norm(D_h1, 
                                        center=True, scale=True, 
                                        is_training=True)
    D_logit = tf.matmul(D_h1, D_W2) + D_b2
    D_prob = tf.nn.sigmoid(D_logit)

    return D_prob, D_logit

def sampler(z, is_training=False):
    G_h1 = tf.nn.tanh(tf.matmul(z, G_W1) + G_b1)
    G_h1 = tf.contrib.layers.batch_norm(G_h1, 
                                        center=True, scale=True, 
                                        is_training=is_training)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)

    return G_log_prob

G_pred = sampler(Z)

G_sample = generator(Z)
D_real, D_logit_real = discriminator(X)
D_fake, D_logit_fake = discriminator(G_sample)

# D_loss = -tf.reduce_mean(tf.log(D_real) + tf.log(1. - D_fake))
# G_loss = -tf.reduce_mean(tf.log(D_fake))

# Alternative losses:
# -------------------
D_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_real, labels=tf.ones_like(D_logit_real)))
D_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.zeros_like(D_logit_fake)))
D_loss = D_loss_real + D_loss_fake
G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.ones_like(D_logit_fake)))

D_solver = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(D_loss, var_list=theta_D)
G_solver = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(G_loss, var_list=theta_G)

mb_size = 128
Z_dim = 100

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(100000):
    if it % 1000 == 0:
        samples = sess.run(G_sample, feed_dict={Z: sample_Z(16, Z_dim)})

    X_mb = np.sort(np.random.normal(size=(mb_size,1)))

    _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})
    _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={Z: sample_Z(mb_size, Z_dim)})
    #_, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={Z: sample_Z(mb_size, Z_dim)})

    if it % 1000 == 0:
        print('Iter: {}'.format(it))
        print('D loss: {:.4}'. format(D_loss_curr))
        print('G_loss: {:.4}'.format(G_loss_curr))
        print()
    if it % 1000 == 0:
        plt.hist(sess.run(G_pred, feed_dict={Z: sample_Z(1000, 100)}))
        plt.show()
"""
        

In [None]:
%matplotlib inline

import matplotlib
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem, rdMolTransforms
import os
import fnmatch
import numpy as np
import deepchem as dc
from scipy.sparse import csr_matrix
from sklearn.preprocessing import OneHotEncoder


In [None]:
def get_torsions_angles(mol):
    torsion_tuples = []
    for bond in mol.GetBonds():
        atom_i = bond.GetBeginAtom()
        atom_j = bond.GetEndAtom()
        if atom_i.IsInRing() or atom_j.IsInRing():
            continue
        for neighbor_i in atom_i.GetNeighbors():
            if neighbor_i.GetIdx() == atom_j.GetIdx():
                continue
            
            for neighbor_j in atom_j.GetNeighbors():
                if neighbor_j.GetIdx() == atom_i.GetIdx():
                    continue
                torsion_tuple = (neighbor_i.GetIdx(), atom_i.GetIdx(), atom_j.GetIdx(), neighbor_j.GetIdx())
                reverse_torsion_tuple = (neighbor_j.GetIdx(), atom_j.GetIdx(), atom_i.GetIdx(), neighbor_i.GetIdx())
                if torsion_tuple not in torsion_tuples and reverse_torsion_tuple not in torsion_tuples:
                    torsion_tuples.append(torsion_tuple)
    c = mol.GetConformer(0)
    torsions = []
    torsion_matrix = np.zeros((250,1))
    torsion_indices = np.zeros((250,200,4)).astype(np.uint8)
    for i, torsion_tuple in enumerate(torsion_tuples):
        torsion_matrix[i] = np.abs(rdMolTransforms.GetDihedralRad(c, *torsion_tuple))
        torsion_indices[i][torsion_tuple[0]][0] = 1
        torsion_indices[i][torsion_tuple[1]][1] = 1
        torsion_indices[i][torsion_tuple[2]][2] = 1
        torsion_indices[i][torsion_tuple[3]][3] = 1
    return((torsion_indices, csr_matrix(torsion_matrix)))
                

In [None]:
def featurize_mols(mol_files):
    featurizer = AdjacencyFingerprint(max_n_atoms=200)
    features = []
    for mol_file in mol_files:
        mol = Chem.MolFromMol2File(mol_file)
        if mol is None:
            features.append(None)
            continue
        torsions = get_torsions_angles(mol)
        graph_feat = featurizer.featurize([mol])[0]
        features.append((mol_file, torsions, graph_feat))
    return(features)

In [None]:
from deepchem.feat.graph_features import ConvMolFeaturizer
from deepchem.feat.adjacency_fingerprints import AdjacencyFingerprint

In [None]:
import pickle
feature_file = "./dihedral_features_pdbbind.pkl"
if not os.path.exists(feature_file):
#if 1== 1:
    pdbbind_dir = "/home/evan/Documents/deep_docking/datasets/v2015/"
    def find_files(directory, pattern):
        for root, dirs, files in os.walk(directory):
            for basename in files:
                if fnmatch.fnmatch(basename, pattern):
                    filename = os.path.join(root, basename)
                    yield filename
    ligand_files = [f for f in find_files(pdbbind_dir, "*ligand.mol2")]
    features = featurize_mols(ligand_files)
    with open(feature_file, "wb") as f:
        pickle.dump(features, f, protocol=2)
else:
    with open(feature_file, "rb") as f:
        features = pickle.load(f)

In [None]:
features = [f for f in features if f is not None and len(np.where(f[1][1].toarray() == 0)[0]) < 250]

In [None]:
len(features)

In [None]:
import tensorflow as tf
n_layers = 2
S = 1
B = 200
L_list = [50, 50, 50, 50]
p = 75

x = tf.placeholder(tf.float32, shape=[S, B, p])

non_zero_inds = tf.placeholder(tf.int32, shape=[None, S*250])

adj_matrix = tf.placeholder(tf.float32, shape=[S, B, B])
dihed_indices = tf.placeholder(tf.float32, shape=[S, 250, B, 4])

label_placeholder = tf.placeholder(
    dtype='float32', shape=[S*250], name="label_placeholder")

phase = tf.placeholder(dtype='bool', name='phase')

z = tf.placeholder(tf.float32,
                          [None, L_list[0]], name='z')


In [None]:
#DISCRIMINATOR

D_W_list = [None for i in range(n_layers)]
D_b_list = [None for i in range(n_layers)]
D_h_list = [None for i in range(n_layers)]



for layer_idx in range(n_layers):
    if layer_idx == 0:
      L_in = p
      L_out = L_list[0]
      atom_matrix = x
    else:
      L_in = L_list[layer_idx-1]
      L_out = L_list[layer_idx]
      atom_matrix = D_h_list[layer_idx-1]

    D_W_list[layer_idx] = tf.Variable(tf.truncated_normal([L_in, L_out], seed=2017), name="D_W_list%d" %layer_idx)
    D_b_list[layer_idx] = tf.Variable(tf.zeros([1, L_out]))

    
D_W2 = tf.Variable(tf.truncated_normal([L_list[-1]*4+1, 100]))
D_b2 = tf.Variable(tf.zeros([1, 100]))

D_W3 = tf.Variable(tf.truncated_normal([100, 1]))
D_b3 = tf.Variable(tf.zeros([1, 1]))

D_W4 = tf.Variable(tf.truncated_normal([1, 10]))
D_b4 = tf.Variable(tf.zeros([1, 10]))

D_W5 = tf.Variable(tf.truncated_normal([10, 1]))
D_b5 = tf.Variable(tf.zeros([1, 1]))

def adjacency_conv_layer(atom_matrix, W, b, L_in, L_out, layer_idx, is_training=True):
    print("layer_idx: %d" %(layer_idx))
    h = tf.matmul(adj_matrix, atom_matrix, name="adj_conv1")
    h = tf.reshape(h, shape=(S*B, L_in))

    h = tf.nn.sigmoid(tf.matmul(h, W) + b)
    h = tf.reshape(h, (S, B, L_out))
    h = tf.contrib.layers.batch_norm(h, 
                                        center=True, scale=True, 
                                        is_training=is_training)

    return(h)

def discriminator(angles, is_training=True):
    for layer_idx in range(n_layers):
        if layer_idx == 0:
          L_in = p
          L_out = L_list[0]
          atom_matrix = x
        else:
          L_in = L_list[layer_idx-1]
          L_out = L_list[layer_idx]
          atom_matrix = D_h_list[layer_idx-1]
        
        D_h_list[layer_idx] = adjacency_conv_layer(atom_matrix, D_W_list[layer_idx], D_b_list[layer_idx], L_in, L_out, layer_idx)

    L_final = L_list[n_layers-1]
    h_final = tf.reshape(D_h_list[layer_idx], (S, B, L_final))

    #add dihedral regressor layers

    d0 = []
    for i in range(0, S):
        mol_tuple = []
        for j in range(0, 4):
          entry = h_final[i]
          indices = dihed_indices[i][:,:,j]
          atom_list = tf.matmul(indices, entry, name="disc1")
          atom_list = tf.reshape(atom_list, (250, L_final))
          mol_tuple.append(atom_list)
        mol_tuple = tf.reshape(tf.stack(mol_tuple, axis=1), (250, L_final*4))
        d0.append(mol_tuple)

    d0 = tf.concat(d0, axis=0)
    d0 = tf.concat([d0, tf.reshape(angles, (-1,1))], axis=1)
    d0 = tf.matmul(tf.cast(non_zero_inds, tf.float32), d0)


    d2 = tf.nn.tanh(tf.matmul(d0, D_W2, name="disc2") + D_b2)

    d2 = tf.contrib.layers.batch_norm(d2, 
                                        center=True, scale=True, 
                                        is_training=is_training)
    
    d3 = tf.matmul(d2, D_W3, name="disc3") + D_b3

    d3 = tf.nn.tanh(d3)

    d3 = tf.contrib.layers.batch_norm(d3, 
                                        center=True, scale=True, 
                                        is_training=is_training)
    
                                                                                                                                                               
    d4 = tf.nn.tanh(tf.matmul(d3, D_W4, name="disc3") + D_b4)
    
    D_logit = tf.nn.tanh(tf.matmul(d4, D_W5, name="disc4") + D_b5)
    
    D_prob = tf.nn.sigmoid(D_logit)
    return(D_prob, D_logit)



In [None]:
def atan2(y, x, epsilon=1.0e-12):
  # Add a small number to all zeros, to avoid division by zero:
  x = tf.where(tf.equal(x, 0.0), x+epsilon, x)
  y = tf.where(tf.equal(y, 0.0), y+epsilon, y)

  angle = tf.where(tf.greater(x,0.0), tf.atan(y/x), tf.zeros_like(x))
  angle = tf.where(tf.logical_and(tf.less(x,0.0),  tf.greater_equal(y,0.0)), tf.atan(y/x) + np.pi, angle)
  angle = tf.where(tf.logical_and(tf.less(x,0.0),  tf.less(y,0.0)), tf.atan(y/x) - np.pi, angle)
  angle = tf.where(tf.logical_and(tf.equal(x,0.0), tf.greater(y,0.0)), 0.5*np.pi * tf.ones_like(x), angle)
  angle = tf.where(tf.logical_and(tf.equal(x,0.0), tf.less(y,0.0)), -0.5*np.pi * tf.ones_like(x), angle)
  angle = tf.where(tf.logical_and(tf.equal(x,0.0), tf.equal(y,0.0)), tf.zeros_like(x), angle)
  return angle

#GENERATOR

G_W_list = [None for i in range(n_layers)]
G_b_list = [None for i in range(n_layers)]
G_h_list = [None for i in range(n_layers)]


for layer_idx in range(n_layers):
    if layer_idx == 0:
      L_in = p
      L_out = L_list[0]
      atom_matrix = x
    else:
      L_in = L_list[layer_idx-1]
      L_out = L_list[layer_idx]
      atom_matrix = G_h_list[layer_idx-1]

    G_W_list[layer_idx] = tf.Variable(tf.truncated_normal([L_in, L_out], seed=2017), name="G_W_list%d" %layer_idx)
    G_b_list[layer_idx] = tf.Variable(tf.zeros([1, L_out]))

    
G_W2 = tf.Variable(tf.truncated_normal([L_list[-1]*4, 100]))
G_b2 = tf.Variable(tf.zeros([1, 100]))

G_W3 = tf.Variable(tf.truncated_normal([100, 100]))
G_b3 = tf.Variable(tf.zeros([1, 100]))

G_W4 = tf.Variable(tf.truncated_normal([100, 1]))
G_b4 = tf.Variable(tf.zeros([1, 1]))


def gen_adjacency_conv_layer(atom_matrix, W, b, L_in, L_out, layer_idx, z, is_training):
    print("layer_idx: %d" %(layer_idx))
    h = tf.matmul(adj_matrix, atom_matrix, name="gen0_%d" %layer_idx)
    h = tf.reshape(h, shape=(S*B, L_in))

    h = tf.nn.tanh(tf.matmul(h, W, name="gen1") + b)

    h = tf.add(h, z)
    h = tf.reshape(h, (S, B, L_out))
    h = tf.contrib.layers.batch_norm(h, 
                                        center=True, scale=True, 
                                        is_training=is_training)

    return(h)

def generator(z, is_training=True):
    for layer_idx in range(n_layers):
        if layer_idx == 0:
          L_in = p
          L_out = L_list[0]
          atom_matrix = x
        else:
          L_in = L_list[layer_idx-1]
          L_out = L_list[layer_idx]
          atom_matrix = G_h_list[layer_idx-1]
        
        G_h_list[layer_idx] = gen_adjacency_conv_layer(atom_matrix, G_W_list[layer_idx], G_b_list[layer_idx], L_in, L_out, layer_idx, z, is_training)

    L_final = L_list[n_layers-1]
    g_h_final = tf.reshape(G_h_list[layer_idx], (S, B, L_final))

    #add dihedral regressor layers

    g_d0 = []
    for i in range(0, S):
        mol_tuple = []
        for j in range(0, 4):
          entry = g_h_final[i]
          indices = dihed_indices[i][:,:,j]
          atom_list = tf.matmul(indices, entry, name='gen2')
          atom_list = tf.reshape(atom_list, (250, L_final))
          mol_tuple.append(atom_list)
        mol_tuple = tf.reshape(tf.stack(mol_tuple, axis=1), (250, L_final*4))
        g_d0.append(mol_tuple)

    g_d0 = tf.concat(g_d0, axis=0)
    
    g_d2 = tf.nn.tanh(tf.matmul(g_d0, G_W2, name='gen3') + G_b2)

    g_d2 = tf.contrib.layers.batch_norm(g_d2, 
                                        center=True, scale=True, 
                                        is_training=is_training)
    
    g_d3 = tf.matmul(g_d2, G_W3) + G_b3
    g_d3 = tf.nn.tanh(g_d3)

    g_d3 = tf.contrib.layers.batch_norm(g_d3, 
                                        center=True, scale=True, 
                                        is_training=is_training)
                                                                                                                                                               
    g_d4 = tf.matmul(g_d3, G_W4) + G_b4
    
    #output = g_d4
    d3_cos = tf.cos(g_d4)
    d3_sin = tf.sin(g_d4)
    output = tf.abs(atan2(d3_sin, d3_cos))
    
    G_logit = tf.nn.sigmoid(output)

    return(output, G_logit)



In [None]:
theta_D = [D_W_list[0], D_W_list[1], D_W2, D_W3, D_W4, D_W5, D_b_list[0], D_b_list[1], D_b2, D_b3, D_b4, D_b5]

theta_G = [G_W_list[0], G_W_list[1], G_W2, G_W3, G_W4, G_b_list[0], G_b_list[1], G_b2, G_b3, G_b4]

In [None]:
def construct_feed_dict(X, start=None,
                      stop=None, y=None,
                      keep_prob=1.0, train=False):
    if start is None:
      start = 0
      stop = len(X)

    adj = [X[idx][2][0].toarray().astype(np.float32) for idx in range(start, stop)]
    A_batch = [X[idx][2][1].toarray() for idx in range(start, stop)]
    D_batch = [X[idx][1][0] for idx in range(start, stop)]
    y_batch = [X[idx][1][1].toarray() for idx in range(start, stop)]

    y_batch = np.squeeze(np.concatenate(y_batch))

    non_zero_batch = np.where(y_batch != 0.)[0]

    onehotter = OneHotEncoder(n_values = S*X[0][1][1].shape[0])
    non_zero_onehot = onehotter.fit_transform(non_zero_batch).toarray().reshape((len(non_zero_batch),S*X[0][1][1].shape[0]))
    
    z_batch = np.random.uniform(-1., 1., size=(1,50))
    #y_batch = np.random.random(size=(S*250))
    
    feed_dict = {x: A_batch,
                 adj_matrix: adj,
                 phase: train,
                 label_placeholder: y_batch,
                 non_zero_inds: non_zero_onehot,
                 dihed_indices: D_batch,
                 z:z_batch
                }
    return(feed_dict)



In [None]:
G_pred, G_pred_logit = generator(z, is_training=False)
G_sample, G_sample_logit = generator(z, is_training=True)

D_real, D_logit_real = discriminator(label_placeholder)
D_fake, D_logit_fake = discriminator(G_sample)

# D_loss = -tf.reduce_mean(tf.log(D_real) + tf.log(1. - D_fake))
# G_loss = -tf.reduce_mean(tf.log(D_fake))

# Alternative losses:
# -------------------
D_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_real, labels=tf.ones_like(D_logit_real)))
D_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.zeros_like(D_logit_fake)))
D_loss = D_loss_real + D_loss_fake
G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.ones_like(D_logit_fake)))

D_solver = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(D_loss, var_list=theta_D)
G_solver = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(G_loss, var_list=theta_G)

mb_size = S

sess = tf.Session()
sess.run(tf.global_variables_initializer())

train_dataset = features[:1]
n_train = len(train_dataset)
preds = []

for it in range(1000):
    if it % 100 == 0:
        print("Training epoch %d" %it)
    batch_sched = list(range(0, n_train+1,S))
    for j in range(0, len(batch_sched)-1):
        start = batch_sched[j]
        stop = batch_sched[j+1]
        feed_dict = construct_feed_dict(train_dataset, start, stop)

        _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict=feed_dict)
        _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict=feed_dict)

    if it % 100 == 0:
        print('Iter: {}'.format(it))
        print('D loss: {:.4}'. format(D_loss_curr))
        print('G_loss: {:.4}'.format(G_loss_curr))
        print()

    if it % 100 == 0:
        samples = sess.run(G_sample, feed_dict=feed_dict)
        print(samples[:10])

In [None]:
batch_sched

In [None]:
for it in range(10001):
    if it % 1000 == 0:
        print("Training epoch %d" %it)
    batch_sched = list(range(0, n_train+1,S))
    for j in range(0, len(batch_sched)-1):
        start = batch_sched[j]
        stop = batch_sched[j+1]
        feed_dict = construct_feed_dict(train_dataset, start, stop)

        _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict=feed_dict)
        _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict=feed_dict)

    if it % 1000 == 0:
        print('Iter: {}'.format(it))
        print('D loss: {:.4}'. format(D_loss_curr))
        print('G_loss: {:.4}'.format(G_loss_curr))
        print()

    if it % 1000 == 0:
        samples = sess.run(G_sample, feed_dict=feed_dict)
        print(samples[:10])

In [None]:
preds = []
for k in range(0,100):
    feed_dict = construct_feed_dict(train_dataset, 0, 1)
    samples = sess.run(G_sample, feed_dict=feed_dict)[2]
    preds.append(samples)

In [None]:
plt.hist(np.concatenate(preds))

In [None]:
preds[4]

In [None]:
train_dataset[0][1][1].toarray()

In [None]:
res = gan.predict()

In [None]:
res

In [None]:
plt.hist(res, bins=50)
plt.show()

In [None]:
features[0][1][1].toarray()

In [None]:
features[0][1][1].toarray()

In [None]:
preds = []
for i in range(0,100):
    preds.append(gan.predict(features)[0][2])

In [None]:
np.mean(preds)

In [None]:
features[0][2].atom_features