<a href="https://colab.research.google.com/github/samveddubey/ELM-CNN-/blob/main/Prot2Num.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install biopython

Collecting biopython
  Downloading biopython-1.83-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: biopython
Successfully installed biopython-1.83


In [7]:

from Bio.SeqUtils import ProtParam
import numpy as np

def convert_to_numerical_matrix(protein_sequence, scale, window_size, edge):
    # Calculate scaled values using protein_scale
    scaled_values = ProtParam.ProteinAnalysis(protein_sequence).protein_scale(scale, window=window_size, edge=edge)

    # Convert scaled_values to a numerical matrix
    matrix = []
    for i in range(len(protein_sequence)):
        if i < window_size - 1:
            # Padding for the edge treatment
            padding = window_size - 1 - i
            subsequence = protein_sequence[:i+1] + 'X'*padding
        else:
            subsequence = protein_sequence[i-window_size+1:i+1]

        # Convert subsequence to numerical values based on the scale
        numerical_values = [scale[aa] if aa in scale else 0 for aa in subsequence]
        matrix.append(numerical_values)

    return np.array(matrix)

# Define a custom scale (example scale)
scale = {'A': 0.5, 'R': -1.2, 'N': -0.5, 'D': -0.5, 'C': 0.4, 'Q': -0.5, 'E': -0.5, 'G': 0, 'H': -0.5, 'I': 1,
         'L': 1, 'K': -1.2, 'M': 0.8, 'F': 1.8, 'P': 0, 'S': -0.8, 'T': -0.7, 'W': 1.6, 'Y': 1.6, 'V': 0.6}

# Example protein sequence
protein_sequence = "MTEITAAMVKELRESTGAGMMDCKNALSETNGDFDKAVQLLREKGLGKAAKKADRLAAEG"

# Define window size and edge treatment
window_size = 5
edge = 0.4

# Convert protein sequence to numerical matrix
numerical_matrix = convert_to_numerical_matrix(protein_sequence, scale, window_size, edge)

print("Numerical matrix:")
print(numerical_matrix)
print(len(numerical_matrix))




Numerical matrix:
[[ 0.8  0.   0.   0.   0. ]
 [ 0.8 -0.7  0.   0.   0. ]
 [ 0.8 -0.7 -0.5  0.   0. ]
 [ 0.8 -0.7 -0.5  1.   0. ]
 [ 0.8 -0.7 -0.5  1.  -0.7]
 [-0.7 -0.5  1.  -0.7  0.5]
 [-0.5  1.  -0.7  0.5  0.5]
 [ 1.  -0.7  0.5  0.5  0.8]
 [-0.7  0.5  0.5  0.8  0.6]
 [ 0.5  0.5  0.8  0.6 -1.2]
 [ 0.5  0.8  0.6 -1.2 -0.5]
 [ 0.8  0.6 -1.2 -0.5  1. ]
 [ 0.6 -1.2 -0.5  1.  -1.2]
 [-1.2 -0.5  1.  -1.2 -0.5]
 [-0.5  1.  -1.2 -0.5 -0.8]
 [ 1.  -1.2 -0.5 -0.8 -0.7]
 [-1.2 -0.5 -0.8 -0.7  0. ]
 [-0.5 -0.8 -0.7  0.   0.5]
 [-0.8 -0.7  0.   0.5  0. ]
 [-0.7  0.   0.5  0.   0.8]
 [ 0.   0.5  0.   0.8  0.8]
 [ 0.5  0.   0.8  0.8 -0.5]
 [ 0.   0.8  0.8 -0.5  0.4]
 [ 0.8  0.8 -0.5  0.4 -1.2]
 [ 0.8 -0.5  0.4 -1.2 -0.5]
 [-0.5  0.4 -1.2 -0.5  0.5]
 [ 0.4 -1.2 -0.5  0.5  1. ]
 [-1.2 -0.5  0.5  1.  -0.8]
 [-0.5  0.5  1.  -0.8 -0.5]
 [ 0.5  1.  -0.8 -0.5 -0.7]
 [ 1.  -0.8 -0.5 -0.7 -0.5]
 [-0.8 -0.5 -0.7 -0.5  0. ]
 [-0.5 -0.7 -0.5  0.  -0.5]
 [-0.7 -0.5  0.  -0.5  1.8]
 [-0.5  0.  -0.5  1.8 -0.5]
 [