In [None]:
from google.colab import output
output.enable_custom_widget_manager()

Support for third party widgets will remain active for the duration of the session. To disable support:

In [None]:
# Load the test protein
blob = client.bucket("public-datasets-deepmind-alphafold-v4").blob(
    'AF-A0A5C2FU82-F1'+'-model_v4.cif')
parser = PDB.FastMMCIFParser()
structure = parser.get_structure('AF-A0A5C2FU82-F1', blob.open())


In [None]:
def PreProcessPDBStructure(pdb_structure):
    residue_names = []
    atom_names = []
    coords = []
    for r in pdb_structure.get_residues():
        for a in r.get_atoms():
            residue_names.append(r.get_resname())
            atom_names.append(a.get_name())
            coords.append(a.get_coord())
    residue_names = np.array(residue_names)
    atom_names = np.array(atom_names)
    normalized_coordinates = np.array(coords)
    normalized_coordinates -= np.mean(coords, 0)


    return {
        'name': pdb_structure.get_id(),
        'residue_names': residue_names,
        'atom_names': atom_names,
        'normalized_coordinates': normalized_coordinates,
    }

In [None]:
def _FeaturesFromPreprocessedStructure(
    preprocessed_structure, residue_names_preprocessor,
    atom_names_preprocessor):
  residue_names = residue_names_preprocessor.lookup(
      tf.constant(preprocessed_structure['residue_names']))
  atom_names = atom_names_preprocessor.lookup(
      tf.constant(preprocessed_structure['atom_names']))
  normalized_coordinates = tf.constant(preprocessed_structure['normalized_coordinates'])
  return {
      'residue_names': tf.expand_dims(residue_names, 0),
      'atom_names': tf.expand_dims(atom_names, 0),
      'normalized_coordinates': tf.expand_dims(normalized_coordinates, 0)}

In [None]:
def UpdateStructure(structure, new_coordinates):
  loc = 0
  for atom in structure.get_atoms():
    atom.set_coord(new_coordinates[loc])
    loc+=1
  structure.atom_to_internal_coordinates(True)

## Original Structure

In [None]:
preprocessed_structure = PreProcessPDBStructure(structure)
original_data = _FeaturesFromPreprocessedStructure(
    preprocessed_structure, residue_names_preprocessor, atom_names_preprocessor)

In [None]:
#conditioning = diffusion_model._conditioner.conditioning(
#    original_data['residue_names'], original_data['atom_names'])
#encoding = diffusion_model._encoder.encode(
#    original_data['normalized_coordinates'], conditioning)
#diffusion_model.set_scorer(
#    ScoreTrain(PerfectScoreModel(encoding)))

#gamma_module = tf.Module()
#gamma_module.gamma_min = -6.0
#gamma_module.gamma_max = 10.0
#diffusion_model.set_gamma_module(gamma_module)

In [None]:
nglview.show_biopython(structure)

In [None]:
UpdateStructure(structure, preprocessed_structure['normalized_coordinates'])
nglview.show_biopython(structure)

In [None]:
print(preprocessed_structure['normalized_coordinates'])

In [None]:
(error_dist, true_dist, z_0, z_t, new_z_0) = diffusion_model.reconstruct(1, original_data)

In [None]:
print(error_dist.mean())

In [None]:
print(true_dist.mean())

In [None]:
print(preprocessed_structure['normalized_coordinates'])

In [None]:
def NetImprovement(true_solution, error_solution, actual_solution):
  return  (tf.reduce_mean(tf.math.abs(true_solution - actual_solution))/
           tf.reduce_mean(tf.math.abs(true_solution - error_solution)))

In [None]:
print(tf.reduce_mean(tf.math.abs(preprocessed_structure['normalized_coordinates'] - true_dist.mean()[0])))

In [None]:
print(tf.reduce_mean(tf.math.abs(preprocessed_structure['normalized_coordinates'] - error_dist.mean()[0])))

In [None]:
print(
  NetImprovement(preprocessed_structure['normalized_coordinates'],
                 error_dist.mean()[0], true_dist.mean()[0]))

In [None]:
print(NetImprovement(z_0, z_t, new_z_0))

In [None]:
print(new_z_0)

In [None]:
import scipy
import numpy as np
import matplotlib.pyplot as plt

In [None]:
UpdateStructure(structure, true_dist.mean()[0])
nglview.show_biopython(structure)

In [None]:
def PlotToDist(timesteps, gamma_min, gamma_max, x_0):
  ts = np.arange(timesteps+1)/timesteps
  gammas = gamma_max + (gamma_min - gamma_max) *ts
  def sigmoid(g):
    return 1/(1 + np.exp(-g))
  sigma2s = sigmoid(gammas)
  x_norm = tf.norm(x_0).numpy()
  e_norm = tf.math.sqrt(tf.math.reduce_sum(tf.ones_like(x_0))).numpy()

  plt.plot(ts, (1-np.sqrt(1-sigma2s)) * x_norm + np.sqrt(sigma2s) * e_norm)
  print((1-np.sqrt(1-sigma2s)) * x_norm + np.sqrt(sigma2s) * e_norm)
  plt.axvline(x=0.9)
  plt.axvline(x=1)
  plt.ylim([0,25])

In [None]:
PlotToDist(10000, -6, 6, z_0)

In [None]:
print(tf.norm(z_0 - new_z_0))
print(tf.norm(z_0 - z_t))

In [None]:
print(true_dist.mean())
print(tf.norm(true_dist.mean()[0] - preprocessed_structure['normalized_coordinates'])/811)

In [None]:
print(true_dist.mean()[0] - preprocessed_structure['normalized_coordinates'])
print(tf.norm(true_dist.mean()[0] - preprocessed_structure['normalized_coordinates'], ord=1)/811)

In [None]:
def DecoderPerformance(gammas_to_test):
  cond = diffusion_model._conditioner.conditioning(
      original_data['residue_names'], original_data['residue_names'], training=False)
  emb = diffusion_model._encoder.encode(
      original_data['normalized_coordinates'], cond, training=False)
  eps = tf.random.normal(tf.shape(emb))
  errors = []
  for g in gammas_to_test:
    emb_with_error = diffusion_model.variance_preserving_map(emb, g, eps) / diffusion_model.alpha(g)
    solution = diffusion_model._decoder.decode(emb_with_error, cond, training=False)
    errors.append(tf.norm(solution.mean()[0] - preprocessed_structure['normalized_coordinates'], ord=1)/811)
  plt.plot(gammas_to_test, errors)

In [None]:
print(tf.norm(perfect_solution.mean()[0] - preprocessed_structure['normalized_coordinates'], ord=1)/811)
print(perfect_solution.mean()[0] - preprocessed_structure['normalized_coordinates'])
DecoderPerformance([float(x) for x in np.arange(10, 20, 0.5, dtype=np.float)])

# Debug Model Issues

In [None]:
diffusion_model.compute_model_loss(original_data)