In [1]:
import os
import torch
import numpy as np

print(os.sys.path)
os.sys.path.append("/home/matthias/DIKU Project Protein Folding/protein-folding-gans/src/openprotein/pnerf")

# CODE FILES HERE
from model_params import get_model_data_dcgan
from models.dcgan import Dcgan, Generator, Discriminator
from solver import Solver
from directories import Directories
from dataloader import DataLoader
from plots import plot_losses, plot_z_samples, plot_z_grid
from sampling import dcgan_sampling
from openprotein.preprocessing import process_raw_data
import openprotein.preprocessing

# SETTINGS HERE
os.environ['CUDA_LAUNCH_BLOCKING'] = "1" # to see the CUDA stack
%matplotlib inline
# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2
# supress cluttering warnings in solutions
import warnings
warnings.filterwarnings('ignore')

['/home/matthias/DIKU Project Protein Folding/protein-folding-gans/src', '/home/matthias/.local/share/virtualenvs/openprotein-xj-YAKcS/lib/python37.zip', '/home/matthias/.local/share/virtualenvs/openprotein-xj-YAKcS/lib/python3.7', '/home/matthias/.local/share/virtualenvs/openprotein-xj-YAKcS/lib/python3.7/lib-dynload', '/home/matthias/anaconda3/envs/openprotein/lib/python3.7', '', '/home/matthias/.local/share/virtualenvs/openprotein-xj-YAKcS/lib/python3.7/site-packages', '/home/matthias/.local/share/virtualenvs/openprotein-xj-YAKcS/lib/python3.7/site-packages/IPython/extensions', '/home/matthias/.ipython']


In [2]:
# setting device on GPU if available, else CPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

# Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_cached(0)/1024**3,1), 'GB')

Using device: cpu



In [3]:
dataset_arg = "proteins"
prefix = "_max_length/"
data = get_model_data_dcgan(dataset_arg)
residue_fragments = 16
openprotein.preprocessing.MAX_SEQUENCE_LENGTH = 128
print(openprotein.preprocessing.MAX_SEQUENCE_LENGTH, residue_fragments)
process_raw_data(False, force_pre_processing_overwrite=False, prefix=prefix)

128 16
Starting pre-processing of raw data...
files ['openprotein/data/raw/training_50.txt', 'openprotein/data/raw/training_95.txt', 'openprotein/data/raw/training_100.txt', 'openprotein/data/raw/validation', 'openprotein/data/raw/testing.txt', 'openprotein/data/raw/sample.txt', 'openprotein/data/raw/training_70.txt', 'openprotein/data/raw/training_90.txt', 'openprotein/data/raw/training_30.txt']
Preprocessed file for training_50.txt already exists.
Skipping pre-processing for this file...
Preprocessed file for training_95.txt already exists.
Skipping pre-processing for this file...
Preprocessed file for training_100.txt already exists.
Skipping pre-processing for this file...
Preprocessed file for validation already exists.
Skipping pre-processing for this file...
Preprocessed file for testing.txt already exists.
Skipping pre-processing for this file...
Preprocessed file for sample.txt already exists.
Skipping pre-processing for this file...
Preprocessed file for training_70.txt alrea

In [4]:
train_file_name = "testing"
val_file_name = train_file_name
training_file = "openprotein/data/preprocessed/" + str(openprotein.preprocessing.MAX_SEQUENCE_LENGTH)\
            + prefix + train_file_name + ".txt.hdf5"
validation_file = "openprotein/data/preprocessed/" + str(openprotein.preprocessing.MAX_SEQUENCE_LENGTH)\
            + prefix + val_file_name + ".txt.hdf5"
print("training file: {0}\nvalidation file: {1}".format(training_file, validation_file))

training file: openprotein/data/preprocessed/128_max_length/testing.txt.hdf5
validation file: openprotein/data/preprocessed/128_max_length/testing.txt.hdf5


In [5]:
directories = Directories("dcgan", dataset_arg.lower(), data["z_dim"], make_dirs=True)
data_loader = DataLoader(directories, data["batch_size"], dataset_arg.lower(),
                         training_file=training_file, validation_file=validation_file,
                         residue_fragments=residue_fragments, atom="calpha")

Reading cache file in openprotein/data/preprocessed/128_max_length/testing_contact_maps.dat
Reading cache file in openprotein/data/preprocessed/128_max_length/testing_contact_maps.dat
(16, 16) 256 3 3 82 3 82


In [None]:
dcgan = Dcgan(data_loader.input_dim, data["z_dim"])
generator = Generator(data["z_dim"], data_loader.input_dim, data_loader.img_dims, res=residue_fragments)
discriminator = Discriminator(1, 1, res=residue_fragments)
solver = Solver(dcgan, generator, discriminator, data["epochs"], data_loader, data["optimizer_G"],
                data["optimizer_D"], data["optim_config_G"], data["optim_config_D"], save_model_state=False)
solver.main()

+++++ START RUN | saved files in dcgan/proteins_z=100_0 +++++
params used:
 epochs: 50
dim(z): 100
batch_size: 32
optimizer_G: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.5, 0.999)
    eps: 1e-08
    lr: 0.0001
    weight_decay: 0.0
)
optimizer_D: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.5, 0.999)
    eps: 1e-08
    lr: 0.0001
    weight_decay: 0.0
)
dataset: proteins
img dims: (16, 16)
atom: calpha
Dcgan(
  (loss): BCELoss()
)Generator(
  (layers): Sequential(
    (0): ConvTranspose2d(100, 512, kernel_size=(4, 4), stride=(1, 1))
    (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.2)
    (3): ConvTranspose2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): LeakyReLU(negative_slope=0.2)
    (6): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
   

In [None]:
# Insert name of model here if want to load a model
#res_dir = "../../results/proteins_z=100_0/"
#solver = torch.load(res_dir+"model_state.pt", map_location="cpu")
#solver.generator.eval()
#solver.discriminator.eval()
#solver.data_loader.directories.make_dirs = False

In [None]:
# Plotting g and d losses for all epochs
plot_losses(solver, solver.train_loss_history["g_loss"], solver.train_loss_history["d_loss"])

In [None]:
samples = dcgan_sampling(generator, solver.model.z_dim, solver.data_loader.img_dims, 25).detach().numpy()
imgs, rows, cols = solver.get_sample_stats()
plot_z_grid(samples[:imgs], res_dir+"grid.png", rows=rows, cols=cols, fill=True)

In [None]:
#import matplotlib.pyplot as plt
#import torchvision
#asd = torch.FloatTensor(np.load("openprotein/data/preprocessed/2000_max_length/testing_contact_maps.dat", allow_pickle=True))
#test = asd
#for i in range(10):
#    plt.axis("off")
#    plt.imshow(test[i])
#    fig1 = plt.gcf()
#    plt.draw()
#    fig1.savefig('tessstttyyy'+str(i)+'.png', dpi=100, bbox_inches="tight", pad_inches=0.0, transparent=True)