# A working baseline model, with potential room for improvement

1. Encoding the Sequences: The amino acid sequences are converted to numerical sequences for model compatibility. Unknown amino acids are represented as zeros.

2. Model Architecture:
For Q3 Prediction (`3-state`), we use a basic CNN with an embedding layer, Conv1D, and a global pooling layer to reduce dimensions.
For Q8 Prediction (`8-state`), we introduce a bidirectional LSTM layer alongside the CNN. This hybrid structure aims to capture both local and long-range dependencies.

3. Training: We train both models separately and evaluate them on test sets.

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, LSTM, Bidirectional
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [None]:
# Load data
data = pd.read_csv('/content/drive/MyDrive/SC4001 Files/2018-06-06-pdb-intersect-pisces.csv')

# Encode amino acid sequences as integers
def encode_sequence(sequence):
    amino_acids = 'ACDEFGHIKLMNPQRSTVWY'  # Amino acid letters
    encoder = {aa: i + 1 for i, aa in enumerate(amino_acids)}  # Start indexing from 1
    return [encoder.get(aa, 0) for aa in sequence]  # Unknown amino acids are encoded as 0

data['seq_encoded'] = data['seq'].apply(encode_sequence)

# Encode sst3 and sst8 labels
sst3_encoder = LabelEncoder()
sst8_encoder = LabelEncoder()
sst3_labels = sst3_encoder.fit_transform([''.join(label) for label in data['sst3']])
sst8_labels = sst8_encoder.fit_transform([''.join(label) for label in data['sst8']])

# Pad sequences for consistent input length
max_length = max(data['seq_encoded'].apply(len))
X = pad_sequences(data['seq_encoded'], maxlen=max_length, padding='post')
y_sst3 = sst3_labels
y_sst8 = sst8_labels

# Split into training and testing sets
X_train, X_test, y_sst3_train, y_sst3_test, y_sst8_train, y_sst8_test = train_test_split(
    X, y_sst3, y_sst8, test_size=0.2, random_state=42)


In [None]:
# Model for Q3 Prediction (3-State)
model_q3 = Sequential([
    Embedding(input_dim=21, output_dim=128, input_length=max_length),  # 21 for amino acids + padding
    Conv1D(64, 3, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(64, activation='relu'),
    Dense(len(sst3_encoder.classes_), activation='softmax')
])

# Compile and train the model
model_q3.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_q3.fit(X_train, y_sst3_train, validation_data=(X_test, y_sst3_test), epochs=10, batch_size=32)

Epoch 1/10




[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 24ms/step - accuracy: 0.0000e+00 - loss: 9.1241 - val_accuracy: 0.0000e+00 - val_loss: 9.2067
Epoch 2/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.0000e+00 - loss: 9.0857 - val_accuracy: 0.0000e+00 - val_loss: 9.3758
Epoch 3/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.0000e+00 - loss: 9.0533 - val_accuracy: 0.0000e+00 - val_loss: 9.5367
Epoch 4/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.0000e+00 - loss: 9.0278 - val_accuracy: 0.0000e+00 - val_loss: 9.7032
Epoch 5/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 2.1530e-04 - loss: 8.9685 - val_accuracy: 0.0000e+00 - val_loss: 9.8704
Epoch 6/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 4.7964e-04 - loss: 8.8709 - val_accuracy: 0.0000e+00 - va

<keras.src.callbacks.history.History at 0x7901c010bdc0>

In [None]:
# Model for Q8 Prediction (8-State)
model_q8 = Sequential([
    Embedding(input_dim=21, output_dim=128, input_length=max_length),
    Bidirectional(LSTM(64, return_sequences=True)),
    Conv1D(64, 3, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(64, activation='relu'),
    Dense(len(sst8_encoder.classes_), activation='softmax')
])

# Compile and train the model
model_q8.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_q8.fit(X_train, y_sst8_train, validation_data=(X_test, y_sst8_test), epochs=10, batch_size=32)

Epoch 1/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 110ms/step - accuracy: 0.0000e+00 - loss: 9.1245 - val_accuracy: 0.0000e+00 - val_loss: 9.2077
Epoch 2/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 108ms/step - accuracy: 0.0000e+00 - loss: 9.0847 - val_accuracy: 0.0000e+00 - val_loss: 9.3784
Epoch 3/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 108ms/step - accuracy: 0.0000e+00 - loss: 9.0545 - val_accuracy: 0.0000e+00 - val_loss: 9.5390
Epoch 4/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 108ms/step - accuracy: 2.9319e-05 - loss: 9.0291 - val_accuracy: 0.0000e+00 - val_loss: 9.6920
Epoch 5/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 108ms/step - accuracy: 7.6560e-04 - loss: 9.0075 - val_accuracy: 0.0000e+00 - val_loss: 9.8391
Epoch 6/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 108ms/step - accuracy: 1.2449e-04 - loss: 8.9909 - val_ac

<keras.src.callbacks.history.History at 0x7901e18523e0>

In [None]:
# Evaluation
print("Q3 Model Performance:")
model_q3.evaluate(X_test, y_sst3_test)
print("\nQ8 Model Performance:")
model_q8.evaluate(X_test, y_sst8_test)

Q3 Model Performance:
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0000e+00 - loss: 11.6807

Q8 Model Performance:
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 47ms/step - accuracy: 0.0000e+00 - loss: 10.4948


[10.493728637695312, 0.0]

# Architecture Refinement and Improvement
## Transformer Block Architecture

- Transformer model: Handles sequential data, crucial for tasks like protein structure prediction.
- Uses self-attention mechanism instead of RNNs/CNNs to manage short- and long-range dependencies.
- Key components:
  - **Multi-Head Self-Attention**: Computes attention scores for each position, focusing on amino acid dependencies with each head learning unique patterns.
  - **Feed-Forward Network (FFN)**: Applied after self-attention for complex transformations of attention outputs.
  - **Residual Connections and Layer Normalization**: Enhances gradient flow, stabilizes training, and prevents vanishing/exploding gradients.

## Novel Transformer Architecture for Protein Prediction

- Customizations for protein secondary structure prediction.
  - **Self-Attention Mechanism**:
    - Assesses interactions between distant amino acids.
    - Each head focuses on different sequence regions to capture multi-level relationships.
    - Unlike RNNs, processes all positions simultaneously for faster computation.
  - **Feed-Forward Network (FFN)**:
    - Adds non-linear transformations, aiding accurate structure predictions.
  - **Residual Connections and Layer Normalization**:
    - Improves training stability and preserves critical features across layers.
  - **Output Layer**: Predicts each amino acid’s secondary structure (Q3 or Q8) independently.

## Feature Representation

- Transforms amino acid sequences into dense vectors capturing biochemical properties.

  - **Embedding Layer**:
    - Maps each amino acid to a dense vector (e.g., dimension 64).
    - Embeddings enable representation in continuous space, grouping similar amino acids.
    - Model learns relationships and context based on amino acid positions within sequences.
  - **Self-Attention as Contextual Feature Extractor**:
    - Represents influence of each amino acid across the sequence.
    - Assigns weights to capture importance of each amino acid relative to others.
    - Helps identify structural dependencies, especially with long-range interactions.

- **Combined Approach**:
  - Embeddings + Self-Attention: Enables model to learn complex sequence relationships.
  - Enhances secondary structure prediction by capturing both local and long-range amino acid interactions.

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Dense, Dropout, Input, MultiHeadAttention, LayerNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import plotly.graph_objects as go
import plotly.express as px
from sklearn.metrics import confusion_matrix

In [None]:
# Reload data
data = pd.read_csv('/content/drive/MyDrive/SC4001 Files/2018-06-06-pdb-intersect-pisces.csv')
data['seq_encoded'] = data['seq'].apply(encode_sequence)

# Helper function to encode each amino acid in sequence
def encode_sequence(sequence):
    amino_acids = 'ACDEFGHIKLMNPQRSTVWY'
    encoder = {aa: i + 1 for i, aa in enumerate(amino_acids)}  # Start indexing from 1
    return [encoder.get(aa, 0) for aa in sequence]  # Unknown amino acids encoded as 0

# Encode each amino acid sequence as a sequence of integers
data['seq_encoded'] = data['seq'].apply(encode_sequence)

# Preprocess sst3 and sst8 labels to be per amino acid
sst3_labels = [[label for label in sst3] for sst3 in data['sst3']]
sst8_labels = [[label for label in sst8] for sst8 in data['sst8']]

# Encode sst3 and sst8 labels with LabelEncoder, one label per amino acid
sst3_encoder = LabelEncoder()
sst8_encoder = LabelEncoder()

# Fit encoders on the entire dataset of labels for each amino acid in sst3 and sst8
sst3_encoder.fit(np.concatenate([list(s) for s in data['sst3']]))
sst8_encoder.fit(np.concatenate([list(s) for s in data['sst8']]))

# Transform each amino acid in sst3 and sst8 sequences
sst3_encoded = [sst3_encoder.transform(list(s)) for s in data['sst3']]
sst8_encoded = [sst8_encoder.transform(list(s)) for s in data['sst8']]

# Pad sequences for consistent length
max_length = max(data['seq_encoded'].apply(len))
X = pad_sequences(data['seq_encoded'], maxlen=max_length, padding='post')
y_sst3 = pad_sequences(sst3_encoded, maxlen=max_length, padding='post', value=-1)  # -1 for padding in labels
y_sst8 = pad_sequences(sst8_encoded, maxlen=max_length, padding='post', value=-1)

# Convert y_sst3 and y_sst8 to categorical, handling the -1 padding for ignored labels
y_sst3 = np.array([to_categorical(seq, num_classes=len(sst3_encoder.classes_)) for seq in y_sst3])
y_sst8 = np.array([to_categorical(seq, num_classes=len(sst8_encoder.classes_)) for seq in y_sst8])

In [None]:
# Define Transformer Block for per-position prediction
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"),
            Dense(embed_dim),
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

# Build Transformer-based model for per-amino-acid prediction
def build_transformer_model(num_classes, max_len, embed_dim=64, num_heads=2, ff_dim=128):
    inputs = Input(shape=(max_len,))
    x = Embedding(input_dim=21, output_dim=embed_dim, input_length=max_len, mask_zero=True)(inputs)
    transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
    x = transformer_block(x,training=True)
    x = Dense(num_classes, activation="softmax")(x)
    return Model(inputs=inputs, outputs=x)

# Plot training and validation accuracy and loss for Q3 and Q8 models
def plot_training_history(history, title):
    fig = go.Figure()
    fig.add_trace(go.Scatter(y=history.history['accuracy'], mode='lines', name='Train Accuracy'))
    fig.add_trace(go.Scatter(y=history.history['val_accuracy'], mode='lines', name='Validation Accuracy'))
    fig.add_trace(go.Scatter(y=history.history['loss'], mode='lines', name='Train Loss'))
    fig.add_trace(go.Scatter(y=history.history['val_loss'], mode='lines', name='Validation Loss'))
    fig.update_layout(title=title, xaxis_title='Epoch', yaxis_title='Value', legend_title='Metrics')
    fig.show()

In [None]:
# Compile and train the model for Q3 prediction
model_q3 = build_transformer_model(num_classes=len(sst3_encoder.classes_), max_len=max_length)
model_q3.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history_q3 = model_q3.fit(X, y_sst3, epochs=10, batch_size=32, validation_split=0.2)


Layer 'transformer_block_6' (of type TransformerBlock) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.



Epoch 1/10



Layer 'query' (of type EinsumDense) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.


Layer 'key' (of type EinsumDense) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.


Layer 'value' (of type EinsumDense) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.



[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 125ms/step - accuracy: 0.9414 - loss: 0.1250 - val_accuracy: 0.8534 - val_loss: 0.2946
Epoch 2/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 80ms/step - accuracy: 0.9463 - loss: 0.1091 - val_accuracy: 0.8542 - val_loss: 0.2938
Epoch 3/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 80ms/step - accuracy: 0.9461 - loss: 0.1096 - val_accuracy: 0.8509 - val_loss: 0.2962
Epoch 4/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 84ms/step - accuracy: 0.9460 - loss: 0.1098 - val_accuracy: 0.8541 - val_loss: 0.2937
Epoch 5/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 85ms/step - accuracy: 0.9461 - loss: 0.1097 - val_accuracy: 0.8547 - val_loss: 0.2934
Epoch 6/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 81ms/step - accuracy: 0.9468 - loss: 0.1083 - val_accuracy: 0.8551 - val_loss: 0.2929
Epoch 7/10
[1m227/227[0m

In [None]:
# Compile and train the model for Q8 prediction
model_q8 = build_transformer_model(num_classes=len(sst8_encoder.classes_), max_len=max_length)
model_q8.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history_q8 = model_q8.fit(X, y_sst8, epochs=10, batch_size=32, validation_split=0.2)


Layer 'transformer_block_7' (of type TransformerBlock) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.



Epoch 1/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 114ms/step - accuracy: 0.9045 - loss: 0.2920 - val_accuracy: 0.8110 - val_loss: 0.4771
Epoch 2/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 86ms/step - accuracy: 0.9336 - loss: 0.1698 - val_accuracy: 0.8143 - val_loss: 0.4731
Epoch 3/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 82ms/step - accuracy: 0.9338 - loss: 0.1698 - val_accuracy: 0.8140 - val_loss: 0.4728
Epoch 4/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 82ms/step - accuracy: 0.9335 - loss: 0.1708 - val_accuracy: 0.8127 - val_loss: 0.4742
Epoch 5/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 83ms/step - accuracy: 0.9335 - loss: 0.1702 - val_accuracy: 0.8135 - val_loss: 0.4733
Epoch 6/10
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 87ms/step - accuracy: 0.9331 - loss: 0.1713 - val_accuracy: 0.8148 - val_loss: 0.4721
Epoch 7/10
[1m

In [None]:
# Plot training history for Q3 and Q8 models
plot_training_history(history_q3, "Q3 Prediction - Training History")
plot_training_history(history_q8, "Q8 Prediction - Training History")

# Confusion Matrix for Q3 and Q8 Predictions (sample evaluation for demonstration)
def plot_confusion_matrix(y_true, y_pred, labels, title):
    cm = confusion_matrix(y_true, y_pred, normalize='true')
    fig = px.imshow(cm, x=labels, y=labels, color_continuous_scale='Blues', labels={'x': 'Predicted Label', 'y': 'True Label', 'color': 'Frequency'})
    fig.update_layout(title=title)
    fig.show()

# Sample test predictions for confusion matrices
test_indices = np.random.choice(range(len(X)), 100, replace=False)  # Random sample for quick matrix visualization
y_sst3_test_pred = np.argmax(model_q3.predict(X[test_indices]), axis=-1).flatten()
y_sst8_test_pred = np.argmax(model_q8.predict(X[test_indices]), axis=-1).flatten()

# Flatten true labels for comparison
y_sst3_test_true = np.argmax(y_sst3[test_indices], axis=-1).flatten()
y_sst8_test_true = np.argmax(y_sst8[test_indices], axis=-1).flatten()

# Plot confusion matrices for Q3 and Q8
plot_confusion_matrix(y_sst3_test_true, y_sst3_test_pred, sst3_encoder.classes_, "Q3 Confusion Matrix")
plot_confusion_matrix(y_sst8_test_true, y_sst8_test_pred, sst8_encoder.classes_, "Q8 Confusion Matrix")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 435ms/step
[1m3/4[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 35ms/step



[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 506ms/step


In [None]:
# Similarity Distribution Histogram
# Calculate average similarity scores for each sequence
similarities = [calculate_similarity(true_sst3[i], sst3_pred_decoded[i]) for i in range(len(true_sst3))]
fig = px.histogram(similarities, nbins=20, labels={'value': "Average Similarity Score"}, title="Distribution of Average Similarity Scores")
fig.update_xaxes(range=[0, 1])
fig.show()

# Sequence Length vs. Prediction Accuracy
sequence_lengths = [len(seq) for seq in data['seq'].iloc[:100]]
accuracies = [calculate_similarity(true_sst3[i], sst3_pred_decoded[i]) for i in range(len(true_sst3))]
fig = px.scatter(x=sequence_lengths, y=accuracies, labels={'x': "Sequence Length", 'y': "Average Similarity"}, title="Sequence Length vs. Prediction Accuracy")
fig.show()

# Results of Q3 and Q8 Structure Prediction

In [None]:
# Predict Q3 and Q8 structures using the trained models for the first 100 sequences
test_sequences = data['seq_encoded'].iloc[:100]
test_sequences_padded = pad_sequences(test_sequences, maxlen=max_length, padding='post')

# Model predictions for Q3 and Q8
sst3_pred = np.argmax(model_q3.predict(test_sequences_padded), axis=-1)
sst8_pred = np.argmax(model_q8.predict(test_sequences_padded), axis=-1)

# Decode predictions to readable structure classes
sst3_pred_decoded = [
    ''.join([sst3_encoder.inverse_transform([aa])[0] for aa in seq if aa != -1]) for seq in sst3_pred
]
sst8_pred_decoded = [
    ''.join([sst8_encoder.inverse_transform([aa])[0] for aa in seq if aa != -1]) for seq in sst8_pred
]

# Decode the true labels for sst3 and sst8
true_sst3 = data['sst3'].iloc[:100]
true_sst8 = data['sst8'].iloc[:100]

# Calculate similarity and store results
similarity_scores = []

def calculate_similarity(true_seq, pred_seq):
    matches = sum(t == p for t, p in zip(true_seq, pred_seq))
    return matches / len(true_seq)

for i, seq in enumerate(data['seq'].iloc[:100]):
    q3_similarity = calculate_similarity(true_sst3[i], sst3_pred_decoded[i][:len(seq)])
    q8_similarity = calculate_similarity(true_sst8[i], sst8_pred_decoded[i][:len(seq)])
    avg_similarity = (q3_similarity + q8_similarity) / 2  # Average similarity for ranking

    similarity_scores.append({
        'Sequence': seq,
        'True Q3': true_sst3[i],
        'Predicted Q3': sst3_pred_decoded[i][:len(seq)],
        'True Q8': true_sst8[i],
        'Predicted Q8': sst8_pred_decoded[i][:len(seq)],
        'Q3 Similarity': q3_similarity,
        'Q8 Similarity': q8_similarity,
        'Avg Similarity': avg_similarity
    })


top_5_results = sorted(similarity_scores, key=lambda x: x['Avg Similarity'], reverse=True)[:5]


for result in top_5_results:
    print(f"Sequence: {result['Sequence']}")
    print(f"True Q3 Structure: {result['True Q3']}")
    print(f"Predicted Q3 Structure: {result['Predicted Q3']}")
    print(f"Q3 Similarity: {result['Q3 Similarity'] * 100:.2f}%")
    print(f"True Q8 Structure: {result['True Q8']}")
    print(f"Predicted Q8 Structure: {result['Predicted Q8']}")
    print(f"Q8 Similarity: {result['Q8 Similarity'] * 100:.2f}%")
    print(f"Average Similarity: {result['Avg Similarity'] * 100:.2f}%")
    print("-" * 30)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
Sequence: KPSSPPEELKFQCGQKTLRPRFK
True Q3 Structure: CCCCCCCCCCCCCCCCCCCCCCC
Predicted Q3 Structure: CCCCCCCCCCCCCCCCCCCCCCC
Q3 Similarity: 100.00%
True Q8 Structure: CCCCCCCCCCCCTTCCCCCCCCC
Predicted Q8 Structure: CCCCCCCCCCCCCCCCCCCCCCC
Q8 Similarity: 91.30%
Average Similarity: 95.65%
------------------------------
Sequence: GPPPPPGPPPPPGPPPPPGL
True Q3 Structure: CCCCCCCCCCCCCCCCCCCC
Predicted Q3 Structure: CCCCCCCCCCCCCCCCCCCC
Q3 Similarity: 100.00%
True Q8 Structure: CCCCCSSCCCCCCCCCCCCC
Predicted Q8 Structure: CCCCCCCCCCCCCCCCCCCC
Q8 Similarity: 90.00%
Average Similarity: 95.00%
------------------------------
Sequence: MMAPANNPFGAPPAQVNNPF
True Q3 Structure: CCCCCCCCCCCCCCCCECCC
Predicted Q3 Structure: CCCCCCCCCCCCCCCCCCCC
Q3 Similarity: 95.00%
True Q8 Structure: CCCCCCCCCCCCCCCCBCCC
Predicted Q8 Structure: CCCCCCCCCCCCCCCCCCCC
Q8 Similar