In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np

def parse_hmm(fname):
    f = open(fname)
    line=f.readline()
    while line[0]!='#':
        line=f.readline()
    f.readline()
    f.readline()
    f.readline()
    f.readline()
    seq = []
    extras = np.zeros([0,10])
    prob = np.zeros([0,20])
    line = f.readline()
    while line[0:2]!='//':
        lineinfo = line.split()
        seq.append(lineinfo[0])
        probs_ = [2**(-float(lineinfo[i])/1000) if lineinfo[i]!='*' else 0. for i in range(2,22)]
        prob = np.concatenate((prob,np.matrix(probs_)),axis=0)

        line = f.readline()
        lineinfo = line.split()
        extras_ = [2**(-float(lineinfo[i])/1000) if lineinfo[i]!='*' else 0. for i in range(0,10)]
        extras = np.concatenate((extras,np.matrix(extras_)),axis=0)

        line = f.readline()
        assert len(line.strip())==0

        line = f.readline()
    return (''.join(seq),prob,extras)

## HMM Profile Extraction

In [None]:
import pandas as pd
df = pd.read_csv('/content/drive/MyDrive/HDA/Benchmark_BinaryML.csv')

In [None]:
import numpy as np

# Create an empty list to store hmm_arr for each PDBid
hmm_matrices = []

for i in df['PDBid']:
    hmm_mat = parse_hmm(f'/content/drive/MyDrive/HDA/Benchmark_HMM/{i}.txt')
    hmm_arr = np.array(hmm_mat[1])
    hmm_matrices.append(hmm_arr)

# Add the list of hmm_arr as a new column 'hmm_matrix' in the DataFrame
df['hmm_matrix'] = hmm_matrices

## Skipxgram Extraction

In [None]:
import numpy as np

def calculate_sxgbg_features(evolutionary_profile, X):

    L, _ = evolutionary_profile.shape
    sxgbg_matrix = np.zeros((20, 20))

    for i in range(20):
        for j in range(20):
            sxgbg_value = 0.0

            for l in range(1,L - X):
                sxgbg_value += evolutionary_profile[l-1, i] * evolutionary_profile[l + X , j]

            sxgbg_matrix[i, j] = sxgbg_value

    return sxgbg_matrix

### S0G Matrix

In [None]:
import numpy as np

# Create an empty list to store hmm_arr for each PDBid
s0g_matrix = []

for i in df['hmm_matrix']:
    s0g_mat = calculate_sxgbg_features(i, 0)
    s0g_arr = np.array(s0g_mat)
    s0g_matrix.append(s0g_arr)

df['s0g_matrix'] = s0g_matrix

### S1G Matrix

In [None]:
import numpy as np

# Create an empty list to store hmm_arr for each PDBid
s1g_matrix = []

for i in df['hmm_matrix']:
    s1g_mat = calculate_sxgbg_features(i, 1)
    s1g_arr = np.array(s1g_mat)
    s1g_matrix.append(s1g_arr)

df['s1g_matrix'] = s1g_matrix

### S2G Matrix

In [None]:
import numpy as np

# Create an empty list to store hmm_arr for each PDBid
s2g_matrix = []

for i in df['hmm_matrix']:
    s2g_mat = calculate_sxgbg_features(i, 2)
    s2g_arr = np.array(s2g_mat)
    s2g_matrix.append(s2g_arr)

df['s2g_matrix'] = s2g_matrix

### S3G Matrix

In [None]:
import numpy as np

# Create an empty list to store hmm_arr for each PDBid
s3g_matrix = []

for i in df['hmm_matrix']:
    s3g_mat = calculate_sxgbg_features(i, 3)
    s3g_arr = np.array(s3g_mat)
    s3g_matrix.append(s3g_arr)

df['s3g_matrix'] = s3g_matrix

### S4G Matrix

In [None]:
import numpy as np

# Create an empty list to store hmm_arr for each PDBid
s4g_matrix = []

for i in df['hmm_matrix']:
    s4g_mat = calculate_sxgbg_features(i, 4)
    s4g_arr = np.array(s4g_mat)
    s4g_matrix.append(s4g_arr)

df['s4g_matrix'] = s4g_matrix

### S5G Matrix

In [None]:
import numpy as np

# Create an empty list to store hmm_arr for each PDBid
s5g_matrix = []

for i in df['hmm_matrix']:
    s5g_mat = calculate_sxgbg_features(i, 5)
    s5g_arr = np.array(s5g_mat)
    s5g_matrix.append(s5g_arr)

df['s5g_matrix'] = s5g_matrix

### S6G Matrix

In [None]:
import numpy as np

# Create an empty list to store hmm_arr for each PDBid
s6g_matrix = []

for i in df['hmm_matrix']:
    s6g_mat = calculate_sxgbg_features(i, 6)
    s6g_arr = np.array(s6g_mat)
    s6g_matrix.append(s6g_arr)

df['s6g_matrix'] = s6g_matrix

## Evaluation Metrics

In [None]:
def calculate_accuracy(y_true, y_pred):
    # Ensure inputs are numpy arrays
    y_true = np.array(y_true)
    y_pred = np.array((y_pred>0.5).astype(int))

    # Initialize accuracy
    accuracy = 0

    # Calculate accuracy for each instance
    for i in range(len(y_true)):
        # Calculate intersection and union
        intersection = np.sum(np.logical_and(y_true[i], y_pred[i]))
        union = np.sum(np.logical_or(y_true[i], y_pred[i]))

        # Add to total accuracy
        accuracy += intersection / union

    # Calculate average accuracy
    accuracy /= len(y_true)

    return accuracy

In [None]:
import numpy as np

def norm_accuracy(y_true, y_pred):
    # Ensure inputs are numpy arrays
    y_true = np.array(y_true)
    y_pred = np.array((y_pred>0.5).astype(int))

    acc = []
    # Loop over each instance
    for i in range(len(y_true)):
        # Calculate the number of correct predictions for this instance
        print(y_true[i], y_pred[i])
        correct_predictions = np.sum(y_true[i] == y_pred[i])
        print(correct_predictions/5)
        acc.append(correct_predictions/5)

    # Calculate accuracy
    accuracy = sum(acc) / len(y_true)

    return accuracy

## S0G Matrix Model

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import KFold
from tensorflow.keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau

# Define the model
model_s0g_1 = keras.Sequential([
    keras.layers.Input(shape=(None, 400)),
    keras.layers.Dense(400, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(100, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(25, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(5, activation='sigmoid')
])

# Load your data and preprocess it
train_features = np.array(df['s0g_matrix'].tolist())
train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

train_features = train_features.reshape(-1, 400)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Compile the model and set up callbacks (as in your code)
    model_s0g_1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00005, patience=10, verbose=1, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=7, min_lr=1e-6, verbose=1)
    callbacks = [early_stopping, lr_scheduler]

    batch_size = 4
    model_s0g_1.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), batch_size=batch_size, callbacks=callbacks)

    # Make predictions on the current test sample
    y_pred = model_s0g_1.predict(X_test)


    # Calculate and store the accuracy for this fold
    accuracy = accuracy_score(y_test, (y_pred > 0.5).astype(int))
    precision = precision_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    recall = recall_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy + avg_precision + avg_recall + f1 + avg_acc_norm + avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 11: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 14: early stopping
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 1]
0.8
[1 0 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 0 0 0 0]
0.8
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [1 0 0 0 1]
0.8
[0 0 0 0 1] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 1]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0]

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 15: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 18: early stopping
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 1 0] [0 0 0 1 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 8: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 11: early stopping
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[1 0 0 1 0] [0 0 0 1 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1

## S1G Matrix Model

In [None]:
# Define the model
model_s1g_1 = keras.Sequential([
    keras.layers.Input(shape=(None, 400)),
    keras.layers.Dense(400, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(100, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(25, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(5, activation='sigmoid')
])

# Load your data and preprocess it
train_features = np.array(df['s1g_matrix'].tolist())
train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

train_features = train_features.reshape(-1, 400)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Compile the model and set up callbacks (as in your code)
    model_s1g_1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00005, patience=10, verbose=1, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=7, min_lr=1e-6, verbose=1)
    callbacks = [early_stopping, lr_scheduler]

    batch_size = 4
    model_s1g_1.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), batch_size=batch_size, callbacks=callbacks)

    # Make predictions on the current test sample
    y_pred = model_s1g_1.predict(X_test)


    # Calculate and store the accuracy for this fold
    accuracy = accuracy_score(y_test, (y_pred > 0.5).astype(int))
    precision = precision_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    recall = recall_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy + avg_precision + avg_recall + f1 + avg_acc_norm + avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 13: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 16: early stopping
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 0 0 0 0]
0.8
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 1]
0.6
[0 0 0 0 1] [0 

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 9: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 28: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 29/30
Epoch 30/30
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 0 0 1 0]
0.6
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 1 0 0] [1 0 0 0 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 1 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 1 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 13: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 22: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 25: early stopping
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 0 0 0 1]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 1 0] [0 0 0 0 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 15: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 18: early stopping
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[1 0 0 1 0] [0 0 0 1 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 1]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 

## S2G Matrix Model

In [None]:
# Define the model
model_s2g_1 = keras.Sequential([
    keras.layers.Input(shape=(None, 400)),
    keras.layers.Dense(400, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(100, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(25, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(5, activation='sigmoid')
])

# Load your data and preprocess it
train_features = np.array(df['s2g_matrix'].tolist())
train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

train_features = train_features.reshape(-1, 400)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Compile the model and set up callbacks (as in your code)
    model_s2g_1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00005, patience=10, verbose=1, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=7, min_lr=1e-6, verbose=1)
    callbacks = [early_stopping, lr_scheduler]

    batch_size = 4
    model_s2g_1.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), batch_size=batch_size, callbacks=callbacks)

    # Make predictions on the current test sample
    y_pred = model_s2g_1.predict(X_test)


    # Calculate and store the accuracy for this fold
    accuracy = accuracy_score(y_test, (y_pred > 0.5).astype(int))
    precision = precision_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    recall = recall_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy + avg_precision + avg_recall + f1 + avg_acc_norm + avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 30: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 1 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 8: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 11: early stopping
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[1 0 0 1 0] [0 0 0 1 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1

## S3G Matrix Model

In [None]:
# Define the model
model_s3g_1 = keras.Sequential([
    keras.layers.Input(shape=(None, 400)),
    keras.layers.Dense(400, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(100, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(25, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(5, activation='sigmoid')
])

# Load your data and preprocess it
train_features = np.array(df['s3g_matrix'].tolist())
train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

train_features = train_features.reshape(-1, 400)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Compile the model and set up callbacks (as in your code)
    model_s3g_1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00005, patience=10, verbose=1, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=7, min_lr=1e-6, verbose=1)
    callbacks = [early_stopping, lr_scheduler]

    batch_size = 4
    model_s3g_1.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), batch_size=batch_size, callbacks=callbacks)

    # Make predictions on the current test sample
    y_pred = model_s3g_1.predict(X_test)


    # Calculate and store the accuracy for this fold
    accuracy = accuracy_score(y_test, (y_pred > 0.5).astype(int))
    precision = precision_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    recall = recall_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy + avg_precision + avg_recall + f1 + avg_acc_norm + avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [0 0 0 1 0]
0.6
[0 0 0 0 1] [0 1 0 0 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 1 0 1 0]
0.4
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 0 0 1 0]
0.6
[0 0 1 0 0] [0 0 0 1 0]
0.6
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 13: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 16: early stopping
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 1 0 0 0] [0 0 0 1 0]
0.6
[0 0 1 0 0] [0 0 0 1 0]
0.6
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 1 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 15: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 1 0] [0 0 0 1 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] 

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 15: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 18: early stopping
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 11: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 14: early stopping
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 0 1]
0.6
[0 1 0 0 0] [0 1 0 0 0]
1.0
[1 0 0 1 0] [0 0 0 1 0]
0.8
[1 0 0 0 0] [0 0 0 1 0]
0.6
[1 0 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 1 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 1 0]
0.6
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 1]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0]

  _warn_prf(average, modifier, msg_start, len(result))


## S4G Matrix Model

In [None]:
# Define the model
model_s4g_1 = keras.Sequential([
    keras.layers.Input(shape=(None, 400)),
    keras.layers.Dense(400, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(100, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(25, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(5, activation='sigmoid')
])

# Load your data and preprocess it
train_features = np.array(df['s4g_matrix'].tolist())
train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

train_features = train_features.reshape(-1, 400)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Compile the model and set up callbacks (as in your code)
    model_s4g_1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00005, patience=10, verbose=1, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=7, min_lr=1e-6, verbose=1)
    callbacks = [early_stopping, lr_scheduler]

    batch_size = 4
    model_s4g_1.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), batch_size=batch_size, callbacks=callbacks)

    # Make predictions on the current test sample
    y_pred = model_s4g_1.predict(X_test)


    # Calculate and store the accuracy for this fold
    accuracy = accuracy_score(y_test, (y_pred > 0.5).astype(int))
    precision = precision_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    recall = recall_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy + avg_precision + avg_recall + f1 + avg_acc_norm + avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 22: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 25: early stopping
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [0 0 0 1 0]
0.6
[0 0 0 0 1] [0 1 0 0 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 1 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 0 0 1 0]
0.6
[0 0 1 0 0] [0 0 0 1 0]
0.6
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 10: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 13: early stopping
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 1 0 0] [1 0 0 0 0]
0.6
[0 1 0 0 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 1 0 0] [1 0 0 0 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 1 0 0 0] [0 0 0 0 0]
0.8
[0 0 1 0 0] [1 0 0 0 0]
0.6
[0 0 0 0 1] [1 0 0 1 0]
0.4
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [0 0 0 1 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 14: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 17: early stopping
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [0 0 0 0 1]
0.6
[0 1 0 0 0] [0 0 0 0 1]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 1 0] [0 0 0 0 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 1 0]
0.8
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [1 0 0 0 0]
0.6
[0 0 0 1 0] [1 0 0 1 0]
0.8
[0 0 0 1 0] [0 1 0 0 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 10: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 13: early stopping
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 20: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 23: early stopping
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[1 0 0 1 0] [0 0 0 1 0]
0.8
[1 0 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 1]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 

  _warn_prf(average, modifier, msg_start, len(result))


## S5G Matrix Model

In [None]:
# Define the model
model_s5g_1 = keras.Sequential([
    keras.layers.Input(shape=(None, 400)),
    keras.layers.Dense(400, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(100, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(25, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(5, activation='sigmoid')
])

# Load your data and preprocess it
train_features = np.array(df['s5g_matrix'].tolist())
train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

train_features = train_features.reshape(-1, 400)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Compile the model and set up callbacks (as in your code)
    model_s5g_1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00005, patience=10, verbose=1, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=7, min_lr=1e-6, verbose=1)
    callbacks = [early_stopping, lr_scheduler]

    batch_size = 4
    model_s5g_1.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), batch_size=batch_size, callbacks=callbacks)

    # Make predictions on the current test sample
    y_pred = model_s5g_1.predict(X_test)


    # Calculate and store the accuracy for this fold
    accuracy = accuracy_score(y_test, (y_pred > 0.5).astype(int))
    precision = precision_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    recall = recall_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy + avg_precision + avg_recall + f1 + avg_acc_norm + avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 21: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 1 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 11: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 14: early stopping
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[1 0 0 1 0] [0 0 0 1 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0]

## S6G Matrix Model

In [None]:
# Define the model
model_s6g_1 = keras.Sequential([
    keras.layers.Input(shape=(None, 400)),
    keras.layers.Dense(400, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(100, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(25, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(5, activation='sigmoid')
])

# Load your data and preprocess it
train_features = np.array(df['s6g_matrix'].tolist())
train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

train_features = train_features.reshape(-1, 400)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Compile the model and set up callbacks (as in your code)
    model_s6g_1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00005, patience=10, verbose=1, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=7, min_lr=1e-6, verbose=1)
    callbacks = [early_stopping, lr_scheduler]

    batch_size = 4
    model_s6g_1.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), batch_size=batch_size, callbacks=callbacks)

    # Make predictions on the current test sample
    y_pred = model_s6g_1.predict(X_test)


    # Calculate and store the accuracy for this fold
    accuracy = accuracy_score(y_test, (y_pred > 0.5).astype(int))
    precision = precision_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    recall = recall_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy + avg_precision + avg_recall + f1 + avg_acc_norm + avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 22: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 25: early stopping
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 1 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 0 0 0 0]
0.8
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 12: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 15: early stopping
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 1 0 0] [1 0 0 0 0]
0.6
[0 1 0 0 0] [0 0 0 0 1]
0.6
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 1 0 0] [1 0 0 0 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 1 0 0 0] [0 0 0 0 0]
0.8
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 16: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 19: early stopping
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 1 0] [0 0 0 1 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [0 0 0 0 1]
0.6
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 1 0] [0 1 0 0 0]
0.6
[0 0 0 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 11: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 14: early stopping
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[1 0 0 1 0] [0 0 0 1 0]
0.8
[1 0 0 0 0] [0 1 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0]

# Standardizing SkipGram

In [None]:
# standardize the sxgbg_matrix
import numpy as np

def calculate_sxgbg_stan_features(evolutionary_profile, X):
    L, _ = evolutionary_profile.shape
    sxgbg_matrix = np.zeros((20, 20))

    for i in range(20):
        for j in range(20):
            sxgbg_value = 0.0

            for l in range(1, L - X):
                sxgbg_value += evolutionary_profile[l - 1, i] * evolutionary_profile[l + X, j]

            sxgbg_matrix[i, j] = sxgbg_value

    # Standardize the sxgbg_matrix (z-score normalization)
    mean = np.mean(sxgbg_matrix)
    std = np.std(sxgbg_matrix)

    if std != 0:
        sxgbg_matrix = (sxgbg_matrix - mean) / std
    else:
        sxgbg_matrix = np.zeros_like(sxgbg_matrix)  # Handle the case of zero standard deviation

    return sxgbg_matrix

## S0G Stan

In [None]:
import numpy as np

# Create an empty list to store hmm_arr for each PDBid
s0g_stan_matrix = []

for i in df['hmm_matrix']:
    s0g_mat = calculate_sxgbg_stan_features(i, 0)
    s0g_arr = np.array(s0g_mat)
    s0g_stan_matrix.append(s0g_arr)

df['s0g_stan_matrix'] = s0g_stan_matrix

## S1G Stan

In [None]:
import numpy as np

# Create an empty list to store hmm_arr for each PDBid
s1g_stan_matrix = []

for i in df['hmm_matrix']:
    s1g_mat = calculate_sxgbg_stan_features(i, 1)
    s1g_arr = np.array(s1g_mat)
    s1g_stan_matrix.append(s1g_arr)

df['s1g_stan_matrix'] = s1g_stan_matrix

## S2G Stan

In [None]:
import numpy as np

# Create an empty list to store hmm_arr for each PDBid
s2g_stan_matrix = []

for i in df['hmm_matrix']:
    s2g_mat = calculate_sxgbg_stan_features(i, 2)
    s2g_arr = np.array(s2g_mat)
    s2g_stan_matrix.append(s2g_arr)

df['s2g_stan_matrix'] = s2g_stan_matrix

## S3G Stan

In [None]:
import numpy as np

# Create an empty list to store hmm_arr for each PDBid
s3g_stan_matrix = []

for i in df['hmm_matrix']:
    s3g_mat = calculate_sxgbg_stan_features(i, 3)
    s3g_arr = np.array(s3g_mat)
    s3g_stan_matrix.append(s3g_arr)

df['s3g_stan_matrix'] = s3g_stan_matrix

## S4G Stan

In [None]:
import numpy as np

# Create an empty list to store hmm_arr for each PDBid
s4g_stan_matrix = []

for i in df['hmm_matrix']:
    s4g_mat = calculate_sxgbg_stan_features(i, 4)
    s4g_arr = np.array(s4g_mat)
    s4g_stan_matrix.append(s4g_arr)

df['s4g_stan_matrix'] = s4g_stan_matrix

## S5G Stan

In [None]:
import numpy as np

# Create an empty list to store hmm_arr for each PDBid
s5g_stan_matrix = []

for i in df['hmm_matrix']:
    s5g_mat = calculate_sxgbg_stan_features(i, 5)
    s5g_arr = np.array(s5g_mat)
    s5g_stan_matrix.append(s5g_arr)

df['s5g_stan_matrix'] = s5g_stan_matrix

## S6G Stan

In [None]:
import numpy as np

# Create an empty list to store hmm_arr for each PDBid
s6g_stan_matrix = []

for i in df['hmm_matrix']:
    s6g_mat = calculate_sxgbg_stan_features(i, 6)
    s6g_arr = np.array(s6g_mat)
    s6g_stan_matrix.append(s6g_arr)

df['s6g_stan_matrix'] = s6g_stan_matrix

## S0G Stan Matrix Model

In [None]:
# Define the model
model_s0g_stan = keras.Sequential([
    keras.layers.Input(shape=(None, 400)),
    keras.layers.Dense(400, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(100, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(25, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(5, activation='sigmoid')
])

# Load your data and preprocess it
train_features = np.array(df['s0g_stan_matrix'].tolist())
train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

train_features = train_features.reshape(-1, 400)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Compile the model and set up callbacks (as in your code)
    model_s0g_stan.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00005, patience=10, verbose=1, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=7, min_lr=1e-6, verbose=1)
    callbacks = [early_stopping, lr_scheduler]

    batch_size = 4
    model_s0g_stan.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), batch_size=batch_size, callbacks=callbacks)

    # Make predictions on the current test sample
    y_pred = model_s0g_stan.predict(X_test)


    # Calculate and store the accuracy for this fold
    accuracy = accuracy_score(y_test, (y_pred > 0.5).astype(int))
    precision = precision_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    recall = recall_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy + avg_precision + avg_recall + f1 + avg_acc_norm + avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 20: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 23: early stopping
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 1 0 0 1]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 1 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 1 0 0] [0 0 0 1 0]
0.6
[0 0 0 1 0] [1 0 0 0 0]
0.6
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 9: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 12: early stopping
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[1 0 0 0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 11: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 14: early stopping
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[1 0 0 1 0] [1 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 1]
0.6
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0]

## S1G Stan Matrix Model

In [None]:
# Define the model
model_s1g_stan = keras.Sequential([
    keras.layers.Input(shape=(None, 400)),
    keras.layers.Dense(400, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(100, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(25, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(5, activation='sigmoid')
])

# Load your data and preprocess it
train_features = np.array(df['s1g_stan_matrix'].tolist())
train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

train_features = train_features.reshape(-1, 400)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Compile the model and set up callbacks (as in your code)
    model_s1g_stan.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00005, patience=10, verbose=1, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=7, min_lr=1e-6, verbose=1)
    callbacks = [early_stopping, lr_scheduler]

    batch_size = 4
    model_s1g_stan.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), batch_size=batch_size, callbacks=callbacks)

    # Make predictions on the current test sample
    y_pred = model_s1g_stan.predict(X_test)


    # Calculate and store the accuracy for this fold
    accuracy = accuracy_score(y_test, (y_pred > 0.5).astype(int))
    precision = precision_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    recall = recall_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy + avg_precision + avg_recall + f1 + avg_acc_norm + avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 12: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 15: early stopping
[0 0 0 0 1] [1 0 0 0 1]
0.8
[0 0 0 1 0] [1 0 0 1 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [1 0 0 0 1]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 1 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 0 0 0 0]
0.8
[0 0 1 0 0] [1 0 0 0 0]
0.6
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 1]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 8: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 16: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 19: early stopping
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 0 0 0 1]
0.6
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 1]
0.8
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 1 0] [0 0 0 0 1]
0.6
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 9: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 19: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 22: early stopping
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[1 0 0 1 0] [1 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 1 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 

## S2G Stan Matrix Model

In [None]:
# Define the model
model_s2g_stan = keras.Sequential([
    keras.layers.Input(shape=(None, 400)),
    keras.layers.Dense(400, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(100, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(25, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(5, activation='sigmoid')
])

# Load your data and preprocess it
train_features = np.array(df['s2g_stan_matrix'].tolist())
train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

train_features = train_features.reshape(-1, 400)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Compile the model and set up callbacks (as in your code)
    model_s2g_stan.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00005, patience=10, verbose=1, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=7, min_lr=1e-6, verbose=1)
    callbacks = [early_stopping, lr_scheduler]

    batch_size = 4
    model_s2g_stan.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), batch_size=batch_size, callbacks=callbacks)

    # Make predictions on the current test sample
    y_pred = model_s2g_stan.predict(X_test)


    # Calculate and store the accuracy for this fold
    accuracy = accuracy_score(y_test, (y_pred > 0.5).astype(int))
    precision = precision_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    recall = recall_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy + avg_precision + avg_recall + f1 + avg_acc_norm + avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 18: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 21: early stopping
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 1 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 1]
0.8
[1 0 0 0 0]

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 10: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 13: early stopping
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 0 0 0 1] [1 0 0 0 1]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 1]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [1 0 0 0 1]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 9: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 12: early stopping
[0 0 0 1 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 1]
0.8
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[1 0 0 1 0] [1 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 1 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 1]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1

## S3G Stan Matrix Model

In [None]:
# Define the model
model_s3g_stan = keras.Sequential([
    keras.layers.Input(shape=(None, 400)),
    keras.layers.Dense(400, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(100, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(25, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(5, activation='sigmoid')
])

# Load your data and preprocess it
train_features = np.array(df['s3g_stan_matrix'].tolist())
train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

train_features = train_features.reshape(-1, 400)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Compile the model and set up callbacks (as in your code)
    model_s3g_stan.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00005, patience=10, verbose=1, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=7, min_lr=1e-6, verbose=1)
    callbacks = [early_stopping, lr_scheduler]

    batch_size = 4
    model_s3g_stan.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), batch_size=batch_size, callbacks=callbacks)

    # Make predictions on the current test sample
    y_pred = model_s3g_stan.predict(X_test)


    # Calculate and store the accuracy for this fold
    accuracy = accuracy_score(y_test, (y_pred > 0.5).astype(int))
    precision = precision_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    recall = recall_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy + avg_precision + avg_recall + f1 + avg_acc_norm + avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 16: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 19: early stopping
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [0 0 1 0 0]
0.6
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 0 0 1 0]
0.6
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 11: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 14: early stopping
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[1 0 0 1 0] [0 0 0 1 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 1 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0]

## S4G Stan Matrix Model

In [None]:
# Define the model
model_s4g_stan = keras.Sequential([
    keras.layers.Input(shape=(None, 400)),
    keras.layers.Dense(400, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(100, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(25, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(5, activation='sigmoid')
])

# Load your data and preprocess it
train_features = np.array(df['s4g_stan_matrix'].tolist())
train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

train_features = train_features.reshape(-1, 400)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Compile the model and set up callbacks (as in your code)
    model_s4g_stan.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00005, patience=10, verbose=1, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=7, min_lr=1e-6, verbose=1)
    callbacks = [early_stopping, lr_scheduler]

    batch_size = 4
    model_s4g_stan.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), batch_size=batch_size, callbacks=callbacks)

    # Make predictions on the current test sample
    y_pred = model_s4g_stan.predict(X_test)


    # Calculate and store the accuracy for this fold
    accuracy = accuracy_score(y_test, (y_pred > 0.5).astype(int))
    precision = precision_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    recall = recall_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy + avg_precision + avg_recall + f1 + avg_acc_norm + avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 9: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 20: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 23: early stopping
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 1 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 1 0 0] [0 0 0 1 0]
0.6
[0 0 0 1 0] [0 0 0 1 0]

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 8: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 11: early stopping
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[1 0 0 1 0] [1 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 1]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [1 0 0 1

## S5G Stan Matrix Model

In [None]:
# Define the model
model_s5g_stan = keras.Sequential([
    keras.layers.Input(shape=(None, 400)),
    keras.layers.Dense(400, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(100, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(25, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(5, activation='sigmoid')
])

# Load your data and preprocess it
train_features = np.array(df['s5g_stan_matrix'].tolist())
train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

train_features = train_features.reshape(-1, 400)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Compile the model and set up callbacks (as in your code)
    model_s5g_stan.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00005, patience=10, verbose=1, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=7, min_lr=1e-6, verbose=1)
    callbacks = [early_stopping, lr_scheduler]

    batch_size = 4
    model_s5g_stan.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), batch_size=batch_size, callbacks=callbacks)

    # Make predictions on the current test sample
    y_pred = model_s5g_stan.predict(X_test)


    # Calculate and store the accuracy for this fold
    accuracy = accuracy_score(y_test, (y_pred > 0.5).astype(int))
    precision = precision_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    recall = recall_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy + avg_precision + avg_recall + f1 + avg_acc_norm + avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 18: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 29: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 30/30
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 1]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 9: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 18: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 21: early stopping
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[1 0 0 1 0] [1 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 

## S5G Stan Matrix LSTM Model

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from tensorflow.keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau

# Define the model
model_lstmsn = keras.Sequential([
    keras.layers.Input(shape=(400,1)),
    keras.layers.LSTM(256, return_sequences=True, kernel_initializer = 'he_normal'),
    keras.layers.LSTM(128, return_sequences=True, kernel_initializer = 'he_normal'),  # Another LSTM layer with 64 units
    keras.layers.Flatten(),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(64, activation='relu', kernel_initializer = 'he_normal'),     # Fully connected layer
    keras.layers.Dense(5, activation='sigmoid')    # Output layer with sigmoid activation for multi-label classification
])

train_features = np.array(df['s5g_stan_matrix'].tolist())
train_features = train_features.reshape(-1, 400,1)

train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Compile the model and set up callbacks (as in your code)
    model_lstmsn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00005, patience=7, verbose=1, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=4, min_lr=1e-6, verbose=1)
    callbacks = [early_stopping, lr_scheduler]

    batch_size = 4
    model_lstmsn.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), batch_size=batch_size, callbacks=callbacks)

    # Make predictions on the current test sample
    y_pred = model_lstmsn.predict(X_test)


    # Calculate and store the accuracy for this fold
    accuracy = accuracy_score(y_test, (y_pred > 0.5).astype(int))
    precision = precision_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    recall = recall_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy+avg_precision+avg_recall+f1+avg_acc_norm++avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 9: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 14: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 21: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 24: early stopping
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 6: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 9: early stopping
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 1 0 0 0] [0 0 0 1 0]
0.6
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 1 0 0 0] [0 0 0 0 0]
0.8
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 1 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 5: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 8: early stopping
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[1 0 0 1 0] [1 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [1 1 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 1 0]

## S5G Stan XGBoost Matrix Model

In [None]:
import numpy as np
import xgboost as xgb
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Define the XGBoost classifier
model_xgb = xgb.XGBClassifier()

# Load your data and pr0.0.eprocess it
train_features = np.array(df['s5g_stan_matrix'].tolist())
train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

train_features = train_features.reshape(-1, 400)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Fit the XGBoost model to the training data
    model_xgb.fit(X_train, y_train)

    # Make predictions on the current test sample
    y_pred = model_xgb.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy+avg_precision+avg_recall+f1+avg_acc_norm++avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 1 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 0 0 0 0]
0.8
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [1 0 0 0 0]
0.6
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [1 0 0 1 0]
0.8
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 1 0] [1 0 0 0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [0 0 0 0 1]
0.6
[1 0 0 0 0] [0 0 0 0 1]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 1]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1 1]
0.8
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 1] [1 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 1 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [1 0 0 0 1]
0.8
[1 0 0 0 0] [0 0 0 0 1]
0.6
[1 0 0 0 1] [0 0 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0

  _warn_prf(average, modifier, msg_start, len(result))


## S5G Stan KNN Matrix Model

In [None]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import KFold

# Define the k-NN classifier
model_knn = KNeighborsClassifier(n_neighbors=5)  # You can adjust the number of neighbors (k) as needed

# Load your data and preprocess it
train_features = np.array(df['s5g_stan_matrix'].tolist())
train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

train_features = train_features.reshape(-1, 400)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Fit the k-NN model to the training data
    model_knn.fit(X_train, y_train)

    # Make predictions on the current test sample
    y_pred = model_knn.predict(X_test)

    # Calculate and store the accuracy for this fold
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy+avg_precision+avg_recall+f1+avg_acc_norm++avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 1 0 0] [1 0 0 0 0]
0.6
[0 0 0 1 0] [1 0 0 0 0]
0.6
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 0 0 0 1]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [1 0 0 0 0]
0.6
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [1 0 0 0 0]
0.6
[0 0 0 0 1] [0 0 0 1 0]
0.6
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 0 1]
0.6
[0 0 0 1 0] [0 0 0 1

  _warn_prf(average, modifier, msg_start, len(result))


[0 0 0 1 0] [0 0 0 0 0]
0.8
[1 0 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 1 0 0 0] [0 0 0 0 0]
0.8
[1 0 0 1 0] [0 0 0 1 0]
0.8
[1 0 0 0 0] [0 1 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 0 0 1 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [1 0 0 0 0]
0.6
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 1 0]
0.6
[0 0 0 1 0] [0 0 0 0 1]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 1 0 0 0]
0.6
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [1 0 0 0 0]
0.6
[0 0 0 1 0] [1 0 0 0 0]
0.6
[0 0 0 1 0] [1 0 0 0 0]
0.6
[0 0 0 1 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 1 0]
0.6
[0 0 0 1 0] [1 0 0 0 0]
0.6
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0

## S6G Stan Matrix Model

In [None]:
# Define the model
model_s6g_stan = keras.Sequential([
    keras.layers.Input(shape=(None, 400)),
    keras.layers.Dense(400, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(100, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(25, activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(5, activation='sigmoid')
])

# Load your data and preprocess it
train_features = np.array(df['s6g_stan_matrix'].tolist())
train_labels = np.array(df[['envelope', 'lumen', 'plastoglobule', 'stroma', 'thylakoid_membrane']].values)

train_features = train_features.reshape(-1, 400)

# Initialize K-Fold cross-validator
kf = KFold(n_splits=5)

# Initialize a list to store accuracy scores
accuracy_scores = []
precision_scores = []
recall_scores = []
acc_scores = []
acc_norm = []

# Iterate over each sample for LOOCV
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features[train_index], train_features[test_index]
    y_train, y_test = train_labels[train_index], train_labels[test_index]

    # Compile the model and set up callbacks (as in your code)
    model_s6g_stan.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00005, patience=10, verbose=1, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=7, min_lr=1e-6, verbose=1)
    callbacks = [early_stopping, lr_scheduler]

    batch_size = 4
    model_s6g_stan.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), batch_size=batch_size, callbacks=callbacks)

    # Make predictions on the current test sample
    y_pred = model_s6g_stan.predict(X_test)


    # Calculate and store the accuracy for this fold
    accuracy = accuracy_score(y_test, (y_pred > 0.5).astype(int))
    precision = precision_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    recall = recall_score(y_test, (y_pred > 0.5).astype(int), average='weighted')
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    acc_scores.append(norm_accuracy(y_test, y_pred))
    acc_norm.append(calculate_accuracy(y_test, y_pred))

# Calculate the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_acc_norm = sum(acc_norm)/len(acc_norm)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_acc = sum(acc_scores)/len(acc_scores)
f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
grand_mean=(average_accuracy + avg_precision + avg_recall + f1 + avg_acc_norm + avg_acc)/6
# Print the average accuracy
print("Average Accuracy: ", average_accuracy)
print("Accuracy: ", avg_acc)
print("Average Normalized Accuracy: ", avg_acc_norm)
print("Average Precision: ", avg_precision)
print("Average Recall: ", avg_recall)
print('F1 score:', f1)
print('Grand Mean:', grand_mean)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 15: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 23: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 26: early stopping
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [0 1 0 0 0]
0.6
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 1 0 0] [1

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 8: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 11: early stopping
[0 0 0 1 0] [0 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[1 0 0 1 0] [1 0 0 1 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 0 0 0]
0.8
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 1 0 0] [0 0 1 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 1 0 0 0] [0 1 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 1 0] [0 0 0 1 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[0 0 0 0 1] [1 0 0 0 1]
0.8
[1 0 0 0 0] [1 0 0 0 0]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 0 1] [0 0 0 0 1]
1.0
[1 0 0 0 0] [1 0 0 0 0]
1.0
[0 0 0 1 0] [0 0 0 1