In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '2'

In [None]:
import numpy as np
from matplotlib import pyplot as plt
import torch
import torch.nn as nn
import numpy as np

In [None]:
import alpine

In [None]:
# Function credits to @jgbrasier from Medium: 
# https://medium.com/@jgbrasier/working-with-pdb-files-in-python-7b538ee1b5e4

import pandas as pd
from biopandas.pdb import PandasPdb
from prody import parsePDBHeader
from typing import Optional

def read_pdb_to_dataframe(
    pdb_path: Optional[str] = None,
    model_index: int = 1,
    parse_header: bool = True,
    ) -> pd.DataFrame:
    """
    Read a PDB file, and return a Pandas DataFrame containing the atomic coordinates and metadata.

    Args:
        pdb_path (str, optional): Path to a local PDB file to read. Defaults to None.
        model_index (int, optional): Index of the model to extract from the PDB file, in case
            it contains multiple models. Defaults to 1.
        parse_header (bool, optional): Whether to parse the PDB header and extract metadata.
            Defaults to True.

    Returns:
        pd.DataFrame: A DataFrame containing the atomic coordinates and metadata, with one row
            per atom
    """
    atomic_df = PandasPdb().read_pdb(pdb_path)
    if parse_header:
        header = parsePDBHeader(pdb_path)
    else:
        header = None
    atomic_df = atomic_df.get_model(model_index)
    if len(atomic_df.df["ATOM"]) == 0:
        raise ValueError(f"No model found for index: {model_index}")

    return pd.concat([atomic_df.df["ATOM"], atomic_df.df["HETATM"], ]), header

In [None]:
protein_filename = "./data/proteins/149l.pdb"

In [None]:
protein_data , header = read_pdb_to_dataframe(protein_filename)

In [None]:
print(protein_data['occupancy'].unique())

In [None]:
print(protein_data.shape)
print(header.keys())
protein_data

In [None]:
all_labels = protein_data['occupancy'].unique()
print(all_labels)

In [None]:
protein_data.loc[protein_data['atom_name']=='C5', ['x_coord', 'y_coord', 'z_coord']].values


In [None]:
all_atoms = protein_data['atom_name'].unique()
print(all_atoms)


pts_coords_occupancy = []

for atom in all_atoms:
    atom_coords = protein_data.loc[protein_data['atom_name']==atom, ['x_coord', 'y_coord', 'z_coord', 'occupancy']]
    pts_coords_occupancy.extend(atom_coords.values)

    
    

In [None]:
pts_coords_occupancy_np = np.array(pts_coords_occupancy)
print(pts_coords_occupancy_np.shape)

In [None]:
print(pts_coords_occupancy_np[:,0].min(), pts_coords_occupancy_np[:,0].max())
print(pts_coords_occupancy_np[:,1].min(), pts_coords_occupancy_np[:,1].max())
print(pts_coords_occupancy_np[:,2].min(), pts_coords_occupancy_np[:,2].max())

In [None]:

# hidx,widx,tidx = np.where(pts_coords_occupancy_np > 0.99)
# occupancy_values = pts_coords_occupancy_np[hidx.min():hidx.max(), widx.min():widx.max(), tidx.min():tidx.max()]


In [None]:
H_max, H_min = pts_coords_occupancy_np[:,0].max(),  pts_coords_occupancy_np[:,0].min() 
W_max, W_min = pts_coords_occupancy_np[:,1].max(),  pts_coords_occupancy_np[:,1].min()
D_max, D_min = pts_coords_occupancy_np[:,2].max(),  pts_coords_occupancy_np[:,2].min()

H = np.round(pts_coords_occupancy_np[:,0].max() - pts_coords_occupancy_np[:,0].min())
W = np.round(pts_coords_occupancy_np[:,1].max() - pts_coords_occupancy_np[:,1].min())
D = np.round(pts_coords_occupancy_np[:,2].max() - pts_coords_occupancy_np[:,2].min())

print(H, W, D)
# coords = alpine.utils.coords.get_coords_spatial(int(H), int(W), int(D), bounds=(0, 1)).cuda()

# print(coords.shape)

In [None]:
coords = alpine.utils.coords.get_coords_spatial(int(H), int(W), int(D), bounds=(0, 1)).float().cuda()



In [None]:
occupancy_values = np.zeros((int(H), int(W), int(D), 1))
for i in range(pts_coords_occupancy_np.shape[0]):
    x, y, z, occ = pts_coords_occupancy_np[i]
    x, y, z = np.round(x), np.round(y), np.round(z)
    # convert to 0-1 coordinate scale
    print(x,y,z)
    xnew = (x - H_min) / H
    ynew = (y - W_min) / W
    znew = (z - D_min) / D

    xnew = int(np.clip(int(xnew * H),0,H-1))
    ynew = int(np.clip(int(ynew * W),0,W-1))
    znew = int(np.clip(int(znew * D),0,D-1))
    occupancy_values[xnew, ynew, znew] = occ

occupancy_tensor = torch.from_numpy(occupancy_values).float().cuda()

In [None]:
# siren_sdf = alpine.models.Siren(in_features=3, out_features=1, hidden_features=512, hidden_layers=3, outermost_linear=True).float().cuda()
# siren_sdf.compile()

sdf_inr = alpine.models.FFN(in_features=3, out_features=1, hidden_features=512, hidden_layers=5, outermost_linear=True).float().cuda()
sdf_inr.compile()

print(sdf_inr)

In [None]:
sdf_fitting = sdf_inr.fit_signal(
    input = coords[None,...], 
    signal = occupancy_tensor[None,...],
    n_iters=5000,
    enable_tqdm=True,
    
)

In [None]:
output_occupancy_vals = sdf_fitting['output'][0].detach().cpu().numpy()
output_occupancy_vals = output_occupancy_vals.squeeze()
print(output_occupancy_vals.shape)



In [None]:
import mcubes, skimage.measure
def march_and_save(occupancy, mcubes_thres, savename, smoothen=False):
    '''
        Convert volumetric occupancy cube to a 3D mesh
        
        Inputs:
            occupancy: (H, W, T) occupancy volume with values going from 0 to 1
            mcubes_thres: Threshold for marching cubes algorithm
            savename: DAE file name to save
            smoothen: If True, the mesh is binarized, smoothened, and then the
                marching cubes is applied
        Outputs:
            None
    '''
    if smoothen:
        occupancy = occupancy.copy()
        occupancy[occupancy < mcubes_thres] = 0.0
        occupancy[occupancy >= mcubes_thres] = 1.0
        
        # occupancy = mcubes.smooth(occupancy, method='gaussian', sigma=1)
        mcubes_thres = 0
    vertices, faces = mcubes.marching_cubes(occupancy, mcubes_thres, )
    
    #vertices /= occupancy.shape[0]
        
    mcubes.export_mesh(vertices, faces, savename)

In [None]:
march_and_save(output_occupancy_vals, 0.5, './data/proteins/output/output_mesh_smooth_with_FFN.dae', smoothen=True)

In [None]:
march_and_save(occupancy_tensor.clone().detach().cpu().numpy()[...,0], 0.5, './data/proteins/output/original_mesh_smooth_with_FFN.dae', smoothen=True)

In [None]:
occupancy_tensor.shape

In [None]:
predicted_occupancy_numpy = occupancy_tensor.clone().detach().cpu().numpy()[...,0]
coords_np = coords.clone().detach().cpu().numpy()

In [None]:
print(predicted_occupancy_numpy.shape, coords_np.shape)

In [None]:
coordinates_for_query = []
coords_as_input_to_inr = []
occupancy_values = np.zeros((int(H), int(W), int(D), 1))
for i in range(pts_coords_occupancy_np.shape[0]):
    x, y, z, occ = pts_coords_occupancy_np[i]
    x, y, z = np.round(x), np.round(y), np.round(z)
    coordinates_for_query.append([x,y,z])
    # convert to 0-1 coordinate scale
    print(x,y,z)
    xnew = (x - H_min) / H
    ynew = (y - W_min) / W
    znew = (z - D_min) / D

    xnew = int(np.clip(int(xnew * H),0,H-1))
    ynew = int(np.clip(int(ynew * W),0,W-1))
    znew = int(np.clip(int(znew * D),0,D-1))
    coords_as_input_to_inr.append([xnew,ynew,znew])
    # occupancy_values[xnew, ynew, znew] = occ

# occupancy_tensor = torch.from_numpy(occupancy_values).float().cuda()
coords_query_wh_tensor = torch.from_numpy(np.array(coords_as_input_to_inr)).float().cuda()
print(coords_query_wh_tensor.shape)
coords_input_wh_tensor = torch.from_numpy(np.array(coordinates_for_query)).float().cuda()
print(coords_input_wh_tensor.shape)

In [None]:
occupancy_values_test = sdf_inr.render(coords_input_wh_tensor[None,...])['output']

In [None]:
def export_protein_file(predicted_occupancy_numpy, coords, df, header):
    df2  = df.copy()
    header2 = header.copy()
    print(df2['occupancy'].shape, predicted_occupancy_numpy.flatten().shape)
    df2['occupancy'] = 0.0
    df2['occupancy'] = predicted_occupancy_numpy.flatten()
    return df2.copy()
    

In [None]:
df2_copy = export_protein_file(occupancy_values_test.detach().cpu().numpy().reshape(-1,1), coords_np, protein_data, header)

In [None]:
df2_copy

In [None]:
#  atomic_df = PandasPdb().read_pdb(pdb_path)
#     if parse_header:
#         header = parsePDBHeader(pdb_path)
#     else:
#         header = None
#     atomic_df = atomic_df.get_model(model_index)
#     if len(atomic_df.df["ATOM"]) == 0:
#         raise ValueError(f"No model found for index: {model_index}")

#     return pd.concat([atomic_df.df["ATOM"], atomic_df.df["HETATM"], ]), heade

In [None]:
from biopandas.pdb import PandasPdb

# Suppose `df` is your DataFrame with correct PDB columns
ppdb = PandasPdb()
ppdb.df['ATOM'] = df2_copy[df2_copy['record_name'] == 'ATOM']  # ATOM/HETATM depending on your data
ppdb.df['HETATM'] = df2_copy[df2_copy['record_name'] == 'HETATM']  # ATOM/HETATM depending on your data

# Save to a PDB file
ppdb.to_pdb(path='./data/proteins/output/149l_output.pdb', records=['ATOM','HETATM'], gz=False, append_newline=True)
