<a href="https://colab.research.google.com/github/ranjitranbhor/EnzDes/blob/main/EnzDes1f.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import os
import gzip
import shutil
import torch
import numpy as np
from Bio import PDB
import requests
from pathlib import Path

def setup_environment():
    """Setup the working environment"""
    print("Setting up environment...")
    os.chdir('/content')
    !rm -rf SeqPredNN
    !git clone https://github.com/falategan/SeqPredNN.git
    os.chdir('/content/SeqPredNN')
    !mkdir -p example_pdb_directory example_features pretrained_model

def download_pdb(pdb_id):
    """Download PDB file from RCSB"""
    pdb_id = pdb_id.upper()
    print(f"\nDownloading PDB file for {pdb_id}...")
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    response = requests.get(url)

    if response.status_code == 200:
        with open(f'example_pdb_directory/{pdb_id.lower()}.pdb', 'wb') as f:
            f.write(response.content)
        print(f"Successfully downloaded {pdb_id}.pdb")
        return True
    else:
        print(f"Error downloading PDB file. Status code: {response.status_code}")
        return False

def get_available_chains(pdb_id):
    """Get available chains from PDB file"""
    parser = PDB.PDBParser(QUIET=True)
    structure = parser.get_structure(pdb_id, f'example_pdb_directory/{pdb_id.lower()}.pdb')
    chains = [chain.id for chain in structure[0]]
    return chains

def prepare_files(pdb_id, chain_id):
    """Prepare files for SeqPredNN"""
    print("\nPreparing files...")

    # Gzip PDB file
    with open(f'example_pdb_directory/{pdb_id.lower()}.pdb', 'rb') as f_in:
        with gzip.open(f'example_pdb_directory/{pdb_id.lower()}.pdb.gz', 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)

    # Download pretrained model
    !wget -O pretrained_model/pretrained_parameters.pth https://raw.githubusercontent.com/falategan/SeqPredNN/main/pretrained_model/pretrained_parameters.pth

    # Create chain list file
    with open('chain_list.csv', 'w') as f:
        f.write('Protein,Filename,Chain\n')
        f.write(f'{pdb_id},{pdb_id.lower()}.pdb.gz,{chain_id}')

def run_prediction(pdb_id, chain_id):
    """Run SeqPredNN prediction"""
    print("\nGenerating features...")
    !python3 SeqPredNN/featurise.py -gm -o example_features chain_list.csv example_pdb_directory

    # Prepare for prediction
    with open('example_features/chain_list.txt', 'w') as f:
        f.write(f'{pdb_id}{chain_id}')

    print("\nRunning prediction...")
    parameters = torch.load('pretrained_model/pretrained_parameters.pth', weights_only=True)
    torch.save(parameters, 'pretrained_model/parameters_weights_only.pth')

    # Run prediction and capture output
    !PYTHONPATH=/content/SeqPredNN python3 SeqPredNN/predict.py -p example_features example_features/chain_list.txt pretrained_model/parameters_weights_only.pth | tee prediction_output.txt

def get_predicted_sequence():
    """Extract predicted sequence from output"""
    with open('prediction_output.txt', 'r') as f:
        content = f.read()
        if "Predicted sequence:" in content:
            sequence_line = [line for line in content.split('\n') if "Predicted sequence:" in line][0]
            return sequence_line.split("Predicted sequence:")[-1].strip()
    return None

def main():
    """Main function to run the prediction pipeline"""
    # Setup
    setup_environment()

    # Get PDB ID from user
    pdb_id = input("\nEnter PDB ID (e.g., 3WWJ): ").strip()

    # Download PDB file
    if not download_pdb(pdb_id):
        print("Failed to download PDB file. Exiting...")
        return

    # Get available chains
    available_chains = get_available_chains(pdb_id)
    print(f"\nAvailable chains: {', '.join(available_chains)}")

    # Get chain from user
    chain_id = input(f"Enter chain ID ({', '.join(available_chains)}): ").strip().upper()
    if chain_id not in available_chains:
        print("Invalid chain ID. Exiting...")
        return

    # Run prediction pipeline
    prepare_files(pdb_id, chain_id)
    run_prediction(pdb_id, chain_id)

    # Get and display prediction
    predicted_seq = get_predicted_sequence()

    print("\nPrediction Results:")
    if predicted_seq:
        print(f"\nPredicted sequence for {pdb_id} chain {chain_id}:")
        print(predicted_seq)
        print(f"\nSequence length: {len(predicted_seq)} residues")

        # Save sequence to file
        output_file = f"{pdb_id}_{chain_id}_predicted.fasta"
        with open(output_file, 'w') as f:
            f.write(f">{pdb_id}_{chain_id}_predicted\n")
            f.write(predicted_seq)
        print(f"\nPredicted sequence saved to {output_file}")
    else:
        print("Error: Could not extract predicted sequence from output.")
        print("Please check prediction_output.txt for details.")

# Run the prediction
if __name__ == "__main__":
    main()

Setting up environment...
Cloning into 'SeqPredNN'...
remote: Enumerating objects: 482, done.[K
remote: Counting objects: 100% (148/148), done.[K
remote: Compressing objects: 100% (70/70), done.[K
remote: Total 482 (delta 130), reused 78 (delta 78), pack-reused 334 (from 1)[K
Receiving objects: 100% (482/482), 21.54 MiB | 15.76 MiB/s, done.
Resolving deltas: 100% (227/227), done.

Enter PDB ID (e.g., 3WWJ): 3WWJ

Downloading PDB file for 3WWJ...
Successfully downloaded 3WWJ.pdb

Available chains: A, B, C, D, E, F, G, H, I, J, K, L
Enter chain ID (A, B, C, D, E, F, G, H, I, J, K, L): A

Preparing files...
--2024-11-17 17:07:52--  https://raw.githubusercontent.com/falategan/SeqPredNN/main/pretrained_model/pretrained_parameters.pth
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting resp