<a href="https://colab.research.google.com/github/samveddubey/ELM-CNN-/blob/main/Seq2Num.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np

# RNA Sequence to Numerical Form Using OPT Algorithm
def convert_rna_to_sparse_matrix(sequence):
    # Define the mapping of 3-letter tuples to indices
    tuple_mapping = {
        'AAA': 1, 'AAC': 2, 'AAG': 3, 'AAU': 4,
        'ACA': 5, 'ACC': 6, 'ACG': 7, 'ACU': 8,
        'AGA': 9, 'AGC': 10, 'AGG': 11, 'AGU': 12,
        'AUA': 13, 'AUC': 14, 'AUG': 15, 'AUU': 16,
        'CAA': 17, 'CAC': 18, 'CAG': 19, 'CAU': 20,
        'CCA': 21, 'CCC': 22, 'CCG': 23, 'CCU': 24,
        'CGA': 25, 'CGC': 26, 'CGG': 27, 'CGU': 28,
        'CUA': 29, 'CUC': 30, 'CUG': 31, 'CUU': 32,
        'GAA': 33, 'GAC': 34, 'GAG': 35, 'GAU': 36,
        'GCA': 37, 'GCC': 38, 'GCG': 39, 'GCU': 40,
        'GGA': 41, 'GGC': 42, 'GGG': 43, 'GGU': 44,
        'GUA': 45, 'GUC': 46, 'GUG': 47, 'GUU': 48,
        'UAA': 49, 'UAC': 50, 'UAG': 51, 'UAU': 52,
        'UCA': 53, 'UCC': 54, 'UCG': 55, 'UCU': 56,
        'UGA': 57, 'UGC': 58, 'UGG': 59, 'UGU': 60,
        'UUA': 61, 'UUC': 62, 'UUG': 63, 'UUU': 64
    }

    # Initialize sparse matrix
    sparse_matrix = np.zeros((64, len(sequence) - 2))

    # Populate sparse matrix
    for j in range(len(sequence) - 2):
        three_letter_tuple = sequence[j:j+3]
        if three_letter_tuple in tuple_mapping:
            index = tuple_mapping[three_letter_tuple]
            sparse_matrix[index, j] = 1

    return sparse_matrix



In [6]:
!pip install biopython


Collecting biopython
  Downloading biopython-1.83-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: biopython
Successfully installed biopython-1.83


In [8]:
from Bio.Blast import NCBIWWW
from Bio.Blast import NCBIXML
from Bio import SeqIO

def get_pssm_matrix(protein_sequence):
    result_handle = NCBIWWW.qblast("psiblast", "swissprot", protein_sequence, entrez_query='"all"[Filter]')
    blast_record = NCBIXML.read(result_handle)
    result_handle.close()

    pssm = []
    for alignment in blast_record.alignments:
        for hsp in alignment.hsps:
            pssm.append(hsp.match)

    # Convert the PSSM into a matrix
    pssm_matrix = []
    for line in pssm:
        row = []
        for char in line:
            if char == '|':
                row.append(1)
            else:
                row.append(0)
        pssm_matrix.append(row)

    return pssm_matrix
