<div style="font-size: 40pt; font-weight: bold; padding: 40px 0 20px 0">Energy Function Training</div>

**by Eric Klavins**

Copyright &copy; 2019, University of Washington

**Abstract**: The Hydrophobic-polar (HP) protein folding model is a simplified 2D model of protein folding that was used to show that tasks such as findinfg the minimum eneryg conformation of a protein are NP-Hard. Here, we learn the energy function of the HP model, training it on conformations found during MCMC optimiztion.

# Includes

In [1]:
from __future__ import print_function

import math
import sys
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from collections import namedtuple
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from hp import HP
from translate import Translate, Encoder, Decoder
from energy import Energy

%matplotlib inline

dev = "cuda:0"

# Training

In [None]:
energy = Energy(20)
energy.to(dev)

energy_optimizer = optim.Adam(energy.parameters(), lr=0.0001)
mse_loss = nn.MSELoss()

losses = []

# Make a list of proteins
proteins = [HP.random(20) for _ in range(1000)]
sequences = torch.stack([p.one_hot().to(dev) for p in proteins])

for i in range(10000):
    
    # Run optimize on them until a new conformation and energy is found
    energies = torch.tensor([p.minimize_step(t=2) for p in proteins],dtype=torch.float,device=dev)
    energies = energies.unsqueeze(1)

    # Predict the energies of the conformations
    conformations = torch.stack([p.conf_one_hot().to(dev) for p in proteins])
    inputs = energy.combine(sequences,conformations)
    estimates = energy(inputs)

    # Compute loss and take a gradient step
    loss = mse_loss(estimates, energies)
    energy_optimizer.zero_grad()
    loss.backward()
    energy_optimizer.step()    
    
    sys.stdout.write("\r%d: Loss: %f    " % (i+1,loss))

    losses.append(loss)


3040: Loss: 1.467416    

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



6186: Loss: 1.182645    

In [None]:
plt.plot(losses)
# plt.ylim(-10,10)

In [None]:
fig,ax = plt.subplots(5,5,figsize=(20,20))
for i in range(25):
    proteins[i].show(ax[int(i/5),i%5])
    ax[int(i/5),i%5].set_title("%f, %f" % (proteins[i].energy(), estimates[i]))

# Evaluation

In [None]:
# Test on a new set
p = [HP.random(20) for _ in range(25)]
s = torch.stack([p.one_hot().to(dev) for p in proteins])

e = torch.tensor([p.energy() for p in proteins],dtype=torch.float,device=dev)
e = energies.unsqueeze(1)

# Predict the energies of the conformations
c = torch.stack([p.conf_one_hot().to(dev) for p in proteins])
x = energy.combine(s,c)
est = energy(x)

fig,ax = plt.subplots(5,5,figsize=(20,20))
for i in range(25):
    p[i].show(ax[int(i/5),i%5])
    ax[int(i/5),i%5].set_title("%f, %f" % (p[i].energy(), est[i]))