<a href="https://colab.research.google.com/github/olfabre/LSTM_avec_Keras/blob/main/ProteinesTP3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import requests

from keras.models import Sequential, Model, load_model
from keras.layers import LSTM, Dense, Embedding, Dropout, Bidirectional, Input
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.regularizers import l2
from sklearn.metrics import accuracy_score

# ===================== AMÉLIORATIONS ======================
# 1) Augmenter la taille / diversité du dataset
#    -> Récupérer davantage de protéines (PDB, CATH, SCOP, etc.)
#    -> Fusionner plusieurs bases de données structurées
#    -> Charger un fichier plus volumineux au lieu de "protein-secondary-structure.train"/.test

# 2) Incorporer des données évolutives (PSSM)
#    -> Pour chaque protéine, générer un profil (via PSI-BLAST ou HHblits)
#    -> Chaque résidu est alors représenté par un vecteur de probabilités (dimension 20)
#    -> Supprimer l'Embedding et injecter directement (window_size, 20)

# 3) Architecture bidirectionnelle
#    -> On remplace LSTM(...) par Bidirectional(LSTM(...))

# 4) Réduire les extrêmes non prédits
#    -> On peut réduire la fenêtre ou remplir les bords après coup avec fill_borders()

acides_amines = "ACDEFGHIKLMNPQRSTVWY"  # 20 types
# Ajustez la taille de la fenêtre (ex: 13, 15, ou 21) pour mieux prédire les bords
window_size = 13
struct_map = {0: 'h', 1: 'e', 2: '_'}  # Pour réafficher la structure prédite

def parse_file(data_lines):
    """
    Parse un fichier de structures :
    - Les séquences sont séparées par la balise '<>'
    - Chaque ligne (autre que '<>' et commentaires) contient "AcideAminé Structure"
    Retourne:
      input_sequences  (list de listes d'indices d'acides aminés)
      output_sequences (list de listes d'indices de structure : h->0, e->1, c->2)
    """
    input_sequences = []
    output_sequences = []
    seq_temp = []
    struct_temp = []

    for line in data_lines:
        line = line.strip()
        # Ignorer lignes vides ou commentaires
        if not line or line.startswith("#"):
            continue

        if line == "<>":
            # On stocke la séquence précédente si elle existe
            if seq_temp and struct_temp:
                seq_indices = [acides_amines.index(aa) for aa in seq_temp]
                struct_indices = []
                for s in struct_temp:
                    if s == 'h':
                        struct_indices.append(0)
                    elif s == 'e':
                        struct_indices.append(1)
                    else:
                        struct_indices.append(2)
                input_sequences.append(seq_indices)
                output_sequences.append(struct_indices)
            # Réinitialiser
            seq_temp = []
            struct_temp = []
        else:
            # Chaque ligne doit contenir "AcideAminé Structure"
            parts = line.split()
            if len(parts) == 2:
                aa, struct_sec = parts
                seq_temp.append(aa)
                # h->0, e->1, sinon c->2
                struct_temp.append(struct_sec)

    # Ajouter la dernière séquence si elle n'est pas suivie de '<>'
    if seq_temp and struct_temp:
        seq_indices = [acides_amines.index(aa) for aa in seq_temp]
        struct_indices = []
        for s in struct_temp:
            if s == 'h':
                struct_indices.append(0)
            elif s == 'e':
                struct_indices.append(1)
            else:
                struct_indices.append(2)
        input_sequences.append(seq_indices)
        output_sequences.append(struct_indices)

    return input_sequences, output_sequences

def generate_windows(sequences, structures, window_size):
    """
    Génère des fenêtres glissantes de taille `window_size`.
    La structure à prédire est celle au centre de la fenêtre
    (i + window_size // 2).
    Retourne X, y sous forme de numpy array.

    Important : On renvoie aussi un tableau `seq_idx_for_window`
    qui indique à quelle séquence (et quel offset) la fenêtre appartient,
    afin de reconstituer la prédiction plus tard.
    """
    windowed_sequences = []
    windowed_structures = []
    seq_idx_for_window = []  # (index_seq, startPos) pour reconstituer

    for idx_seq, (seq, struct) in enumerate(zip(sequences, structures)):
        if len(seq) < window_size:
            continue  # Séquence trop courte
        for i in range(len(seq) - window_size + 1):
            windowed_sequences.append(seq[i:i+window_size])
            center_index = i + (window_size // 2)
            windowed_structures.append(struct[center_index])
            seq_idx_for_window.append((idx_seq, i))

    X = np.array(windowed_sequences)
    y = np.array(windowed_structures)
    return X, y, seq_idx_for_window

def reassemble_predictions(
    test_input_sequences,  # liste de séquences (chacune est une liste d'indices d'AA)
    predictions_test,      # shape (nb_windows, nb_structures)
    seq_idx_for_window     # liste de (idx_seq, startPos) de même longueur que predictions_test
):
    """
    Reconstitue, pour chaque séquence, la prédiction résiduelle.
    - On crée un tableau 'predicted_struct' de longueur len(seq), initialisé à None.
    - Pour chaque fenêtre (dont on connaît idx_seq, startPos), on place la prédiction
      au centre de la fenêtre dans 'predicted_struct'.
    - La confiance correspond à la probabilité max du softmax.

    Retourne:
      predicted_structs (liste de listes d'indices, éventuellement None sur bords)
      confidences (liste de float, confiance moyenne sur les positions prédites)
    """
    nb_seq = len(test_input_sequences)
    predicted_structs = [ [None]*len(seq) for seq in test_input_sequences ]
    # stocker la liste des confiances pour chaque position
    predicted_confidences = [ [None]*len(seq) for seq in test_input_sequences ]

    for win_idx, (idx_seq, startPos) in enumerate(seq_idx_for_window):
        seq_length = len(test_input_sequences[idx_seq])
        center = startPos + (window_size // 2)
        if center < seq_length:
            # On prend la classe prédite (argmax) et la confiance associée
            probas = predictions_test[win_idx]  # shape (3,)
            pred_class = np.argmax(probas)
            conf = np.max(probas)
            predicted_structs[idx_seq][center] = pred_class
            predicted_confidences[idx_seq][center] = conf

    # Calculer la confiance moyenne
    avg_confidences = []
    for idx_seq, seq in enumerate(test_input_sequences):
        valid_confs = [c for c in predicted_confidences[idx_seq] if c is not None]
        if len(valid_confs) > 0:
            avg_conf = np.mean(valid_confs)
        else:
            avg_conf = 0.0
        avg_confidences.append(avg_conf)

    return predicted_structs, avg_confidences

def fill_borders(predicted_structs):
    """
    Remplit les bords non prédits (None) en recopiant la
    prédiction la plus proche. Cela réduit les '_' en début/fin.
    """
    for idx_seq in range(len(predicted_structs)):
        seq_len = len(predicted_structs[idx_seq])
        # Vers la gauche
        first_non_none = None
        for i in range(seq_len):
            if predicted_structs[idx_seq][i] is not None:
                first_non_none = predicted_structs[idx_seq][i]
                break
        if first_non_none is not None:
            for j in range(i):
                predicted_structs[idx_seq][j] = first_non_none

        # Vers la droite
        last_non_none = None
        for i in reversed(range(seq_len)):
            if predicted_structs[idx_seq][i] is not None:
                last_non_none = predicted_structs[idx_seq][i]
                break
        if last_non_none is not None:
            for j in range(i+1, seq_len):
                predicted_structs[idx_seq][j] = last_non_none

    return predicted_structs

def indices_to_aa_str(seq_indices):
    """Convertit une liste d'indices d'acides aminés en chaîne de caractères."""
    return "".join(acides_amines[idx] for idx in seq_indices)

def indices_to_struct_str(struct_indices):
    """Convertit une liste d'indices (0,1,2) en 'h,e,_'. None devient '_'."""
    out = []
    for x in struct_indices:
        if x is None:
            out.append('_')
        else:
            out.append(struct_map[x])
    return "".join(out)


# ===================== 1) TÉLÉCHARGEMENT DU TRAIN (dataset potentiellement agrandi) ======================
url_train = "https://olivier-fabre.com/Prediction_proteines/protein-secondary-structure.train"
response_train = requests.get(url_train)
data_lines_train = response_train.text.splitlines()

train_input_sequences, train_output_sequences = parse_file(data_lines_train)

# ===================== 2) FENÊTRES GLISSANTES SUR LE TRAIN =================
X_train, y_train, _ = generate_windows(train_input_sequences, train_output_sequences, window_size)
print(f"Nombre de fenêtres générées (train) : {len(X_train)}")

if len(X_train) == 0:
    print("Aucune donnée n'a pu être extraite du fichier train. Vérifiez le format.")
    import sys
    sys.exit(0)

# Préparation
X_train = pad_sequences(X_train, maxlen=window_size, padding='post')
y_train = to_categorical(y_train, num_classes=3)

print("X_train.shape :", X_train.shape)
print("y_train.shape :", y_train.shape)

# ===================== 3) CONSTRUCTION DU MODÈLE ======================
# Si vous vouliez utiliser un PSSM en entrée :
#  - vous supprimeriez l'Embedding
#  - vous feriez un Input(shape=(window_size, 20)) pour BiLSTM
#  - Cf. code exemplaire dans la discussion précédente

model = Sequential()
model.add(Embedding(
    input_dim=len(acides_amines),  # 20 types d'AA
    output_dim=128,
    input_length=window_size
))

# Architecture BiLSTM pour mieux capturer le contexte
model.add(Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))))
model.add(Dropout(0.5))
model.add(Bidirectional(LSTM(64, return_sequences=False, kernel_regularizer=l2(0.01))))
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dense(3, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

early_stopping = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)
checkpoint = ModelCheckpoint("monMeilleurModel.keras", save_best_only=True, monitor="val_loss")

# ===================== 4) ENTRAÎNEMENT ======================
history = model.fit(
    X_train,
    y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping, checkpoint],
    verbose=1
)

# Charger le meilleur modèle
best_model = load_model("monMeilleurModel.keras")

# ===================== 5) TÉLÉCHARGEMENT ET PARSING DU TEST ======================
url_test = "https://olivier-fabre.com/Prediction_proteines/protein-secondary-structure.test"
response_test = requests.get(url_test)
data_lines_test = response_test.text.splitlines()

test_input_sequences, test_output_sequences = parse_file(data_lines_test)

# Générer les fenêtres glissantes (test)
X_test, y_test, seq_idx_for_window_test = generate_windows(
    test_input_sequences, test_output_sequences, window_size
)

print(f"\nNombre de fenêtres générées (test) : {len(X_test)}")
if len(X_test) == 0:
    print("Aucune donnée n'a pu être extraite du fichier test. Vérifiez le format.")
    import sys
    sys.exit(0)

X_test = pad_sequences(X_test, maxlen=window_size, padding='post')
y_test_cat = to_categorical(y_test, num_classes=3)

print("X_test.shape :", X_test.shape)
print("y_test_cat.shape :", y_test_cat.shape)

# ===================== 6) PRÉDICTIONS SUR LE TEST ======================
predictions_test = best_model.predict(X_test)
predicted_classes_test = np.argmax(predictions_test, axis=-1)
true_classes_test = np.argmax(y_test_cat, axis=-1)

accuracy_test = accuracy_score(true_classes_test, predicted_classes_test)
print(f"\nAccuracy calculée sur le test : {accuracy_test:.2f}")

# ===================== 7) RECONSTITUTION ======================
predicted_structs, avg_confidences = reassemble_predictions(
    test_input_sequences,
    predictions_test,
    seq_idx_for_window_test
)

# On remplit les bords (positions None) en recopiant la prédiction la plus proche
predicted_structs = fill_borders(predicted_structs)

# ===================== 8) AFFICHE MEILLEURE SÉQUENCE ======================
best_seq_idx = int(np.argmax(avg_confidences))
best_conf = avg_confidences[best_seq_idx]

best_seq_input = test_input_sequences[best_seq_idx]    # indices acides aminés
best_seq_struct = test_output_sequences[best_seq_idx]  # indices 0,1,2
best_seq_pred   = predicted_structs[best_seq_idx]      # indices 0,1,2 ou None

seq_str = indices_to_aa_str(best_seq_input)
true_struct_str = indices_to_struct_str(best_seq_struct)
pred_struct_str = indices_to_struct_str(best_seq_pred)

print("\n=== Meilleure prédiction (confiance moyenne: {:.2%}) ===\n".format(best_conf))
print("Séquence:   ", seq_str)
print("Structure:  ", true_struct_str)
print("Prédiction: ", pred_struct_str)
print("\nLégende:")
print("h: hélice alpha")
print("e: feuillet bêta")
print("_: autre (coil)")

Nombre de fenêtres générées (train) : 16774
X_train.shape : (16774, 13)
y_train.shape : (16774, 3)




Epoch 1/50
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 13ms/step - accuracy: 0.5439 - loss: 3.4361 - val_accuracy: 0.5863 - val_loss: 0.9827
Epoch 2/50
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 11ms/step - accuracy: 0.5877 - loss: 0.9150 - val_accuracy: 0.5914 - val_loss: 0.9259
Epoch 3/50
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - accuracy: 0.6085 - loss: 0.8791 - val_accuracy: 0.6030 - val_loss: 0.9204
Epoch 4/50
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - accuracy: 0.6088 - loss: 0.8751 - val_accuracy: 0.6110 - val_loss: 0.9126
Epoch 5/50
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.6163 - loss: 0.8607 - val_accuracy: 0.5967 - val_loss: 0.9146
Epoch 6/50
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.6284 - loss: 0.8484 - val_accuracy: 0.6048 - val_loss: 0.9016
Epoch 7/50
[1m420/4

In [1]:
import numpy as np
import requests

from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Input, Dropout
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import accuracy_score

# =========================================================
# Configuration & Paramètres
# =========================================================

acides_amines = "ACDEFGHIKLMNPQRSTVWY"  # 20 types
window_size = 13
struct_map = {0: 'h', 1: 'e', 2: '_'}  # Pour réafficher la structure prédite

# =========================================================
# Fonctions utilitaires pour parser et préparer les données
# =========================================================

def parse_file(data_lines):
    """
    Parse un fichier de structures :
    - Les séquences sont séparées par la balise '<>'
    - Chaque ligne (autre que '<>' et commentaires) contient "AcideAminé Structure"
    Retourne:
      input_sequences  (list de listes d'indices d'acides aminés)
      output_sequences (list de listes d'indices de structure : h->0, e->1, c->2)
    """
    input_sequences = []
    output_sequences = []
    seq_temp = []
    struct_temp = []

    for line in data_lines:
        line = line.strip()
        # Ignorer lignes vides ou commentaires
        if not line or line.startswith("#"):
            continue

        if line == "<>":
            # On stocke la séquence précédente si elle existe
            if seq_temp and struct_temp:
                seq_indices = [acides_amines.index(aa) for aa in seq_temp]
                struct_indices = []
                for s in struct_temp:
                    if s == 'h':
                        struct_indices.append(0)
                    elif s == 'e':
                        struct_indices.append(1)
                    else:
                        struct_indices.append(2)
                input_sequences.append(seq_indices)
                output_sequences.append(struct_indices)
            # Réinitialiser
            seq_temp = []
            struct_temp = []
        else:
            # Chaque ligne doit contenir "AcideAminé Structure"
            parts = line.split()
            if len(parts) == 2:
                aa, struct_sec = parts
                seq_temp.append(aa)
                # h->0, e->1, sinon c->2
                struct_temp.append(struct_sec)

    # Ajouter la dernière séquence si elle n'est pas suivie de '<>'
    if seq_temp and struct_temp:
        seq_indices = [acides_amines.index(aa) for aa in seq_temp]
        struct_indices = []
        for s in struct_temp:
            if s == 'h':
                struct_indices.append(0)
            elif s == 'e':
                struct_indices.append(1)
            else:
                struct_indices.append(2)
        input_sequences.append(seq_indices)
        output_sequences.append(struct_indices)

    return input_sequences, output_sequences

def generate_windows(sequences, structures, window_size):
    """
    Génère des fenêtres glissantes de taille `window_size`.
    La structure à prédire est celle au centre de la fenêtre
    (i + window_size // 2).
    Retourne X, y sous forme de numpy array.
    """
    windowed_sequences = []
    windowed_structures = []
    seq_idx_for_window = []  # (index_seq, startPos) pour reconstituer

    for idx_seq, (seq, struct) in enumerate(zip(sequences, structures)):
        if len(seq) < window_size:
            continue  # Séquence trop courte
        for i in range(len(seq) - window_size + 1):
            windowed_sequences.append(seq[i:i+window_size])
            center_index = i + (window_size // 2)
            windowed_structures.append(struct[center_index])
            seq_idx_for_window.append((idx_seq, i))

    X = np.array(windowed_sequences)
    y = np.array(windowed_structures)
    return X, y, seq_idx_for_window

def reassemble_predictions(
    test_input_sequences,
    predictions_test,
    seq_idx_for_window,
    window_size
):
    """
    Reconstitue, pour chaque séquence, la prédiction par fenêtre.
    """
    nb_seq = len(test_input_sequences)
    predicted_structs = [ [None]*len(seq) for seq in test_input_sequences ]
    predicted_confidences = [ [None]*len(seq) for seq in test_input_sequences ]

    for win_idx, (idx_seq, startPos) in enumerate(seq_idx_for_window):
        seq_length = len(test_input_sequences[idx_seq])
        center = startPos + (window_size // 2)
        if center < seq_length:
            probas = predictions_test[win_idx]  # shape (3,)
            pred_class = np.argmax(probas)
            conf = np.max(probas)
            predicted_structs[idx_seq][center] = pred_class
            predicted_confidences[idx_seq][center] = conf

    avg_confidences = []
    for idx_seq, seq in enumerate(test_input_sequences):
        valid_confs = [c for c in predicted_confidences[idx_seq] if c is not None]
        avg_conf = np.mean(valid_confs) if len(valid_confs) > 0 else 0.0
        avg_confidences.append(avg_conf)

    return predicted_structs, avg_confidences

def fill_borders(predicted_structs):
    """
    Remplit les bords 'None' en recopiant la prédiction la plus proche.
    """
    for idx_seq in range(len(predicted_structs)):
        seq_len = len(predicted_structs[idx_seq])
        # Vers la gauche
        first_non_none = None
        for i in range(seq_len):
            if predicted_structs[idx_seq][i] is not None:
                first_non_none = predicted_structs[idx_seq][i]
                break
        if first_non_none is not None:
            for j in range(i):
                predicted_structs[idx_seq][j] = first_non_none

        # Vers la droite
        last_non_none = None
        for i in reversed(range(seq_len)):
            if predicted_structs[idx_seq][i] is not None:
                last_non_none = predicted_structs[idx_seq][i]
                break
        if last_non_none is not None:
            for j in range(i+1, seq_len):
                predicted_structs[idx_seq][j] = last_non_none

    return predicted_structs

def indices_to_aa_str(seq_indices):
    """Convertit une liste d'indices d'acides aminés en chaîne de caractères."""
    return "".join(acides_amines[idx] for idx in seq_indices)

def indices_to_struct_str(struct_indices):
    """Convertit une liste d'indices (0,1,2) en 'h,e,_'. None devient '_'."""
    out = []
    for x in struct_indices:
        if x is None:
            out.append('_')
        else:
            out.append(struct_map[x])
    return "".join(out)

# =========================================================
# 1) TÉLÉCHARGEMENT DU TRAIN
# =========================================================
url_train = "https://olivier-fabre.com/Prediction_proteines/protein-secondary-structure.train"
response_train = requests.get(url_train)
data_lines_train = response_train.text.splitlines()

train_input_sequences, train_output_sequences = parse_file(data_lines_train)

# =========================================================
# 2) FENÊTRES GLISSANTES SUR LE TRAIN
# =========================================================
X_train_idx, y_train_idx, _ = generate_windows(train_input_sequences, train_output_sequences, window_size)
print(f"Nombre de fenêtres générées (train) : {len(X_train_idx)}")
if len(X_train_idx) == 0:
    print("Aucune donnée n'a pu être extraite du fichier train. Vérifiez le format.")
    import sys
    sys.exit(0)

# Pour simplifier, on s'assure que X_train_idx a la forme (N, window_size)
# et on effectue un one-hot encoding manuel :
#  X_train_onehot : shape (N, window_size, 20)
# puis on aplatit en (N, 260) pour un réseau fully-connected.
X_train_idx = pad_sequences(X_train_idx, maxlen=window_size, padding='post')  # par sécurité
y_train_cat = to_categorical(y_train_idx, num_classes=3)

# One-hot
X_train_onehot = np.eye(len(acides_amines))[X_train_idx]  # shape (N, window_size, 20)
X_train_onehot = X_train_onehot.reshape((X_train_onehot.shape[0], -1))
# -> shape (N, window_size*20) = (N, 260)

print("X_train_onehot.shape :", X_train_onehot.shape)
print("y_train_cat.shape :", y_train_cat.shape)

# =========================================================
# 3) CONSTRUCTION DU MODÈLE FEED-FORWARD
# =========================================================
from keras.optimizers import SGD
from keras import regularizers

model = Sequential()
# Entrée : 260 neurones (13 x 20)
model.add(Dense(40, activation='sigmoid', input_shape=(window_size*len(acides_amines),)))
# Couche de sortie : 3 neurones (h, e, coil)
model.add(Dense(3, activation='softmax'))

model.compile(
    optimizer=SGD(learning_rate=0.01),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
checkpoint = ModelCheckpoint("monMeilleurModel.keras", save_best_only=True, monitor="val_loss")

# =========================================================
# 4) ENTRAÎNEMENT
# =========================================================
history = model.fit(
    X_train_onehot,
    y_train_cat,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping, checkpoint],
    verbose=1
)

# Charger le meilleur modèle entraîné
best_model = load_model("monMeilleurModel.keras")

# =========================================================
# 5) TÉLÉCHARGEMENT DU TEST
# =========================================================
url_test = "https://olivier-fabre.com/Prediction_proteines/protein-secondary-structure.test"
response_test = requests.get(url_test)
data_lines_test = response_test.text.splitlines()

test_input_sequences, test_output_sequences = parse_file(data_lines_test)

# =========================================================
# 6) GÉNÉRATION DES FENÊTRES TEST
# =========================================================
X_test_idx, y_test_idx, seq_idx_for_window_test = generate_windows(
    test_input_sequences,
    test_output_sequences,
    window_size
)

print(f"\nNombre de fenêtres générées (test) : {len(X_test_idx)}")
if len(X_test_idx) == 0:
    print("Aucune donnée n'a pu être extraite du fichier test. Vérifiez le format.")
    import sys
    sys.exit(0)

X_test_idx = pad_sequences(X_test_idx, maxlen=window_size, padding='post')
y_test_cat = to_categorical(y_test_idx, num_classes=3)

# One-hot sur le test
X_test_onehot = np.eye(len(acides_amines))[X_test_idx]  # shape (N,13,20)
X_test_onehot = X_test_onehot.reshape((X_test_onehot.shape[0], -1))  # (N, 260)

# =========================================================
# 7) PRÉDICTIONS SUR LE TEST
# =========================================================
predictions_test = best_model.predict(X_test_onehot)
predicted_classes_test = np.argmax(predictions_test, axis=-1)
true_classes_test = np.argmax(y_test_cat, axis=-1)

accuracy_test = accuracy_score(true_classes_test, predicted_classes_test)
print(f"\nAccuracy calculée sur le test : {accuracy_test:.2f}")

# =========================================================
# 8) RECONSTITUTION ET AFFICHAGE
# =========================================================
predicted_structs, avg_confidences = reassemble_predictions(
    test_input_sequences,
    predictions_test,
    seq_idx_for_window_test,
    window_size
)

predicted_structs = fill_borders(predicted_structs)

best_seq_idx = int(np.argmax(avg_confidences))
best_conf = avg_confidences[best_seq_idx]

best_seq_input = test_input_sequences[best_seq_idx]
best_seq_struct = test_output_sequences[best_seq_idx]
best_seq_pred   = predicted_structs[best_seq_idx]

seq_str = indices_to_aa_str(best_seq_input)
true_struct_str = indices_to_struct_str(best_seq_struct)
pred_struct_str = indices_to_struct_str(best_seq_pred)

print("\n=== Meilleure prédiction (confiance moyenne: {:.2%}) ===\n".format(best_conf))
print("Séquence:   ", seq_str)
print("Structure:  ", true_struct_str)
print("Prédiction: ", pred_struct_str)
print("\nLégende:")
print("h: hélice alpha")
print("e: feuillet bêta")
print("_: coil/boucle")



Nombre de fenêtres générées (train) : 16774
X_train_onehot.shape : (16774, 260)
y_train_cat.shape : (16774, 3)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.5360 - loss: 1.0182 - val_accuracy: 0.5317 - val_loss: 1.0079
Epoch 2/50
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5408 - loss: 1.0058 - val_accuracy: 0.5317 - val_loss: 1.0066
Epoch 3/50
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5389 - loss: 1.0030 - val_accuracy: 0.5317 - val_loss: 1.0058
Epoch 4/50
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5351 - loss: 1.0027 - val_accuracy: 0.5317 - val_loss: 0.9978
Epoch 5/50
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5383 - loss: 0.9935 - val_accuracy: 0.5317 - val_loss: 0.9979
Epoch 6/50
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5379 - loss: 0.9884 - val_accuracy: 0.5317 - val_loss: 0.9905
Epoch 7/50
[1m420/420[0m 

In [2]:
import numpy as np
import requests

from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Input, Dropout
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import accuracy_score

# =========================================================
# Configuration & Paramètres
# =========================================================

acides_amines = "ACDEFGHIKLMNPQRSTVWY"  # 20 types
window_size = 13
struct_map = {0: 'h', 1: 'e', 2: '_'}  # Pour réafficher la structure prédite

# =========================================================
# Fonctions utilitaires pour parser et préparer les données
# =========================================================

def parse_file(data_lines):
    """
    Parse un fichier de structures :
    - Les séquences sont séparées par la balise '<>'
    - Chaque ligne (autre que '<>' et commentaires) contient "AcideAminé Structure"
    Retourne:
      input_sequences  (list de listes d'indices d'acides aminés)
      output_sequences (list de listes d'indices de structure : h->0, e->1, c->2)
    """
    input_sequences = []
    output_sequences = []
    seq_temp = []
    struct_temp = []

    for line in data_lines:
        line = line.strip()
        # Ignorer lignes vides ou commentaires
        if not line or line.startswith("#"):
            continue

        if line == "<>":
            # On stocke la séquence précédente si elle existe
            if seq_temp and struct_temp:
                seq_indices = [acides_amines.index(aa) for aa in seq_temp]
                struct_indices = []
                for s in struct_temp:
                    if s == 'h':
                        struct_indices.append(0)
                    elif s == 'e':
                        struct_indices.append(1)
                    else:
                        struct_indices.append(2)
                input_sequences.append(seq_indices)
                output_sequences.append(struct_indices)
            # Réinitialiser
            seq_temp = []
            struct_temp = []
        else:
            # Chaque ligne doit contenir "AcideAminé Structure"
            parts = line.split()
            if len(parts) == 2:
                aa, struct_sec = parts
                seq_temp.append(aa)
                # h->0, e->1, sinon c->2
                struct_temp.append(struct_sec)

    # Ajouter la dernière séquence si elle n'est pas suivie de '<>'
    if seq_temp and struct_temp:
        seq_indices = [acides_amines.index(aa) for aa in seq_temp]
        struct_indices = []
        for s in struct_temp:
            if s == 'h':
                struct_indices.append(0)
            elif s == 'e':
                struct_indices.append(1)
            else:
                struct_indices.append(2)
        input_sequences.append(seq_indices)
        output_sequences.append(struct_indices)

    return input_sequences, output_sequences

def generate_windows(sequences, structures, window_size):
    """
    Génère des fenêtres glissantes de taille `window_size`.
    La structure à prédire est celle au centre de la fenêtre
    (i + window_size // 2).
    Retourne X, y sous forme de numpy array.
    """
    windowed_sequences = []
    windowed_structures = []
    seq_idx_for_window = []  # (index_seq, startPos) pour reconstituer

    for idx_seq, (seq, struct) in enumerate(zip(sequences, structures)):
        if len(seq) < window_size:
            continue  # Séquence trop courte
        for i in range(len(seq) - window_size + 1):
            windowed_sequences.append(seq[i:i+window_size])
            center_index = i + (window_size // 2)
            windowed_structures.append(struct[center_index])
            seq_idx_for_window.append((idx_seq, i))

    X = np.array(windowed_sequences)
    y = np.array(windowed_structures)
    return X, y, seq_idx_for_window

def reassemble_predictions(
    test_input_sequences,
    predictions_test,
    seq_idx_for_window,
    window_size
):
    """
    Reconstitue, pour chaque séquence, la prédiction par fenêtre.
    """
    nb_seq = len(test_input_sequences)
    predicted_structs = [ [None]*len(seq) for seq in test_input_sequences ]
    predicted_confidences = [ [None]*len(seq) for seq in test_input_sequences ]

    for win_idx, (idx_seq, startPos) in enumerate(seq_idx_for_window):
        seq_length = len(test_input_sequences[idx_seq])
        center = startPos + (window_size // 2)
        if center < seq_length:
            probas = predictions_test[win_idx]  # shape (3,)
            pred_class = np.argmax(probas)
            conf = np.max(probas)
            predicted_structs[idx_seq][center] = pred_class
            predicted_confidences[idx_seq][center] = conf

    avg_confidences = []
    for idx_seq, seq in enumerate(test_input_sequences):
        valid_confs = [c for c in predicted_confidences[idx_seq] if c is not None]
        avg_conf = np.mean(valid_confs) if len(valid_confs) > 0 else 0.0
        avg_confidences.append(avg_conf)

    return predicted_structs, avg_confidences

def fill_borders(predicted_structs):
    """
    Remplit les bords 'None' en recopiant la prédiction la plus proche.
    """
    for idx_seq in range(len(predicted_structs)):
        seq_len = len(predicted_structs[idx_seq])
        # Vers la gauche
        first_non_none = None
        for i in range(seq_len):
            if predicted_structs[idx_seq][i] is not None:
                first_non_none = predicted_structs[idx_seq][i]
                break
        if first_non_none is not None:
            for j in range(i):
                predicted_structs[idx_seq][j] = first_non_none

        # Vers la droite
        last_non_none = None
        for i in reversed(range(seq_len)):
            if predicted_structs[idx_seq][i] is not None:
                last_non_none = predicted_structs[idx_seq][i]
                break
        if last_non_none is not None:
            for j in range(i+1, seq_len):
                predicted_structs[idx_seq][j] = last_non_none

    return predicted_structs

def indices_to_aa_str(seq_indices):
    """Convertit une liste d'indices d'acides aminés en chaîne de caractères."""
    return "".join(acides_amines[idx] for idx in seq_indices)

def indices_to_struct_str(struct_indices):
    """Convertit une liste d'indices (0,1,2) en 'h,e,_'. None devient '_'."""
    out = []
    for x in struct_indices:
        if x is None:
            out.append('_')
        else:
            out.append(struct_map[x])
    return "".join(out)

# =========================================================
# 1) TÉLÉCHARGEMENT DU TRAIN
# =========================================================
url_train = "https://olivier-fabre.com/Prediction_proteines/protein-secondary-structure.train"
response_train = requests.get(url_train)
data_lines_train = response_train.text.splitlines()

train_input_sequences, train_output_sequences = parse_file(data_lines_train)

# =========================================================
# 2) FENÊTRES GLISSANTES SUR LE TRAIN
# =========================================================
X_train_idx, y_train_idx, _ = generate_windows(train_input_sequences, train_output_sequences, window_size)
print(f"Nombre de fenêtres générées (train) : {len(X_train_idx)}")
if len(X_train_idx) == 0:
    print("Aucune donnée n'a pu être extraite du fichier train. Vérifiez le format.")
    import sys
    sys.exit(0)

# Pour simplifier, on s'assure que X_train_idx a la forme (N, window_size)
# et on effectue un one-hot encoding manuel :
#  X_train_onehot : shape (N, window_size, 20)
# puis on aplatit en (N, 260) pour un réseau fully-connected.
X_train_idx = pad_sequences(X_train_idx, maxlen=window_size, padding='post')  # par sécurité
y_train_cat = to_categorical(y_train_idx, num_classes=3)

# One-hot
X_train_onehot = np.eye(len(acides_amines))[X_train_idx]  # shape (N, window_size, 20)
X_train_onehot = X_train_onehot.reshape((X_train_onehot.shape[0], -1))
# -> shape (N, window_size*20) = (N, 260)

print("X_train_onehot.shape :", X_train_onehot.shape)
print("y_train_cat.shape :", y_train_cat.shape)

# =========================================================
# 3) CONSTRUCTION DU MODÈLE FEED-FORWARD
# =========================================================
from keras.optimizers import SGD
from keras import regularizers

model = Sequential()
# Entrée : 260 neurones (13 x 20)
model.add(Dense(40, activation='sigmoid', input_shape=(window_size*len(acides_amines),)))
# Couche de sortie : 3 neurones (h, e, coil)
model.add(Dense(3, activation='softmax'))

model.compile(
    optimizer=SGD(learning_rate=0.01),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
checkpoint = ModelCheckpoint("monMeilleurModel.keras", save_best_only=True, monitor="val_loss")

# =========================================================
# 4) ENTRAÎNEMENT
# =========================================================
history = model.fit(
    X_train_onehot,
    y_train_cat,
    epochs=200,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping, checkpoint],
    verbose=1
)

# Charger le meilleur modèle entraîné
best_model = load_model("monMeilleurModel.keras")

# =========================================================
# 5) TÉLÉCHARGEMENT DU TEST
# =========================================================
url_test = "https://olivier-fabre.com/Prediction_proteines/protein-secondary-structure.test"
response_test = requests.get(url_test)
data_lines_test = response_test.text.splitlines()

test_input_sequences, test_output_sequences = parse_file(data_lines_test)

# =========================================================
# 6) GÉNÉRATION DES FENÊTRES TEST
# =========================================================
X_test_idx, y_test_idx, seq_idx_for_window_test = generate_windows(
    test_input_sequences,
    test_output_sequences,
    window_size
)

print(f"\nNombre de fenêtres générées (test) : {len(X_test_idx)}")
if len(X_test_idx) == 0:
    print("Aucune donnée n'a pu être extraite du fichier test. Vérifiez le format.")
    import sys
    sys.exit(0)

X_test_idx = pad_sequences(X_test_idx, maxlen=window_size, padding='post')
y_test_cat = to_categorical(y_test_idx, num_classes=3)

# One-hot sur le test
X_test_onehot = np.eye(len(acides_amines))[X_test_idx]  # shape (N,13,20)
X_test_onehot = X_test_onehot.reshape((X_test_onehot.shape[0], -1))  # (N, 260)

# =========================================================
# 7) PRÉDICTIONS SUR LE TEST
# =========================================================
predictions_test = best_model.predict(X_test_onehot)
predicted_classes_test = np.argmax(predictions_test, axis=-1)
true_classes_test = np.argmax(y_test_cat, axis=-1)

accuracy_test = accuracy_score(true_classes_test, predicted_classes_test)
print(f"\nAccuracy calculée sur le test : {accuracy_test:.2f}")

# =========================================================
# 8) RECONSTITUTION ET AFFICHAGE
# =========================================================
predicted_structs, avg_confidences = reassemble_predictions(
    test_input_sequences,
    predictions_test,
    seq_idx_for_window_test,
    window_size
)

predicted_structs = fill_borders(predicted_structs)

best_seq_idx = int(np.argmax(avg_confidences))
best_conf = avg_confidences[best_seq_idx]

best_seq_input = test_input_sequences[best_seq_idx]
best_seq_struct = test_output_sequences[best_seq_idx]
best_seq_pred   = predicted_structs[best_seq_idx]

seq_str = indices_to_aa_str(best_seq_input)
true_struct_str = indices_to_struct_str(best_seq_struct)
pred_struct_str = indices_to_struct_str(best_seq_pred)

print("\n=== Meilleure prédiction (confiance moyenne: {:.2%}) ===\n".format(best_conf))
print("Séquence:   ", seq_str)
print("Structure:  ", true_struct_str)
print("Prédiction: ", pred_struct_str)
print("\nLégende:")
print("h: hélice alpha")
print("e: feuillet bêta")
print("_: coil/boucle")



Nombre de fenêtres générées (train) : 16774
X_train_onehot.shape : (16774, 260)
y_train_cat.shape : (16774, 3)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/200
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.5040 - loss: 1.0353 - val_accuracy: 0.5317 - val_loss: 1.0108
Epoch 2/200
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.5382 - loss: 1.0095 - val_accuracy: 0.5317 - val_loss: 1.0069
Epoch 3/200
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5376 - loss: 1.0058 - val_accuracy: 0.5317 - val_loss: 1.0061
Epoch 4/200
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5343 - loss: 1.0028 - val_accuracy: 0.5317 - val_loss: 0.9994
Epoch 5/200
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5299 - loss: 1.0026 - val_accuracy: 0.5317 - val_loss: 0.9983
Epoch 6/200
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5378 - loss: 0.9928 - val_accuracy: 0.5317 - val_loss: 0.9956
Epoch 7/200
[1m420/4

In [5]:
import numpy as np
import requests

from keras.models import Sequential, load_model
from keras.layers import Dense
from keras.optimizers import SGD
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import accuracy_score

# =========================================================
# 1) Configuration & Paramètres
# =========================================================

# Ajout du caractère '-' pour le spacer, donc 21 symboles :
acides_amines = "ACDEFGHIKLMNPQRSTVWY-"
window_size = 13
struct_map = {0: 'h', 1: 'e', 2: '_'}  # Pour réafficher la structure prédite si besoin

# Indice du spacer
spacer_index = acides_amines.index('-')  # Normalement 20

# =========================================================
# 2) Lecture & concaténation du fichier
# =========================================================
def parse_and_concatenate(data_lines):
    """
    Lit le fichier ligne par ligne et concatène toutes les protéines
    dans un seul ruban (seq_global, struct_global).
    - Chaque protéine est séparée par 1 caractère spacer ('-').
    - La structure associée au spacer sera notée ici comme '2' (coil).

    Retourne: (seq_global, struct_global)
      seq_global   : liste d'indices (0..20) représentant AA ou spacer
      struct_global: liste d'indices (0..2) pour la structure h/e/coil (ou 2 pour spacer)
    """
    seq_global = []
    struct_global = []

    seq_temp = []
    struct_temp = []

    for line in data_lines:
        line = line.strip()
        # Ignorer lignes vides ou commentaires
        if not line or line.startswith("#"):
            continue

        if line == "<>":
            # On a fini une protéine => on transfère seq_temp/struct_temp dans seq_global
            if seq_temp and struct_temp:
                for aa, st in zip(seq_temp, struct_temp):
                    idx_aa = acides_amines.index(aa)  # indice 0..20
                    # h->0, e->1, sinon coil->2
                    if st == 'h':
                        idx_st = 0
                    elif st == 'e':
                        idx_st = 1
                    else:
                        idx_st = 2
                    seq_global.append(idx_aa)
                    struct_global.append(idx_st)

                # On insère un spacer après la protéine
                seq_global.append(spacer_index)
                struct_global.append(2)  # on l'assimile à coil par simplification

            seq_temp = []
            struct_temp = []
        else:
            parts = line.split()
            if len(parts) == 2:
                aa, struct_sec = parts
                seq_temp.append(aa)
                struct_temp.append(struct_sec)

    # Ajouter la dernière protéine si pas suivie de '<>'
    if seq_temp and struct_temp:
        for aa, st in zip(seq_temp, struct_temp):
            idx_aa = acides_amines.index(aa)
            if st == 'h':
                idx_st = 0
            elif st == 'e':
                idx_st = 1
            else:
                idx_st = 2
            seq_global.append(idx_aa)
            struct_global.append(idx_st)

        # Spacer final
        seq_global.append(spacer_index)
        struct_global.append(2)

    return seq_global, struct_global

# =========================================================
# 3) Génération des fenêtres glissantes sur le ruban unique
# =========================================================
def generate_windows_single_ruban(seq_global, struct_global, window_size):
    """
    Génère des fenêtres de taille `window_size` sur seq_global.
    On ignore toute fenêtre dont le CENTRE est un spacer (puisque c'est un faux résidu).

    Retourne:
      X : np.array de shape (N, window_size)
      y : np.array de shape (N,)
    """
    half = window_size // 2
    total_len = len(seq_global)

    windowed_sequences = []
    windowed_structures = []

    for start_i in range(total_len - window_size + 1):
        center_i = start_i + half

        # Si le centre est un spacer, on ignore la fenêtre
        if seq_global[center_i] == spacer_index:
            continue

        fragment = seq_global[start_i : start_i + window_size]
        center_struct = struct_global[center_i]

        windowed_sequences.append(fragment)
        windowed_structures.append(center_struct)

    X = np.array(windowed_sequences)
    y = np.array(windowed_structures)
    return X, y

# ===================== 3.1) Téléchargement & préparation TRAIN =====================
url_train = "https://olivier-fabre.com/Prediction_proteines/protein-secondary-structure.train"
response_train = requests.get(url_train)
data_lines_train = response_train.text.splitlines()

seq_global_train, struct_global_train = parse_and_concatenate(data_lines_train)
print("[TRAIN] Longueur du ruban :", len(seq_global_train))

X_train_idx, y_train_idx = generate_windows_single_ruban(seq_global_train, struct_global_train, window_size)
print("[TRAIN] Nombre de fenêtres générées :", len(X_train_idx))

# One-hot (21 symboles)
# shape initiale: (N, window_size) => (N, window_size, 21) => (N, 273)
N_symbols = len(acides_amines)  # 21
X_train_onehot = np.eye(N_symbols)[X_train_idx]  # (N, window_size, 21)
X_train_onehot = X_train_onehot.reshape((X_train_onehot.shape[0], -1))  # (N, 273)

# Labels: 3 classes
y_train_cat = to_categorical(y_train_idx, num_classes=3)

# ===================== 3.2) Téléchargement & préparation TEST =====================
url_test = "https://olivier-fabre.com/Prediction_proteines/protein-secondary-structure.test"
response_test = requests.get(url_test)
data_lines_test = response_test.text.splitlines()

seq_global_test, struct_global_test = parse_and_concatenate(data_lines_test)
print("[TEST] Longueur du ruban :", len(seq_global_test))

X_test_idx, y_test_idx = generate_windows_single_ruban(seq_global_test, struct_global_test, window_size)
print("[TEST] Nombre de fenêtres générées :", len(X_test_idx))

X_test_onehot = np.eye(N_symbols)[X_test_idx].reshape((len(X_test_idx), -1))
y_test_cat = to_categorical(y_test_idx, num_classes=3)

# =========================================================
# 4) Construction & Entraînement du réseau
# =========================================================
model = Sequential()
model.add(Dense(40, activation='sigmoid', input_shape=(window_size * N_symbols,)))  # 13 * 21 = 273
model.add(Dense(3, activation='softmax'))

model.compile(
    optimizer=SGD(learning_rate=0.01),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
checkpoint = ModelCheckpoint("monModelSpacers.keras", save_best_only=True, monitor="val_loss")

history = model.fit(
    X_train_onehot,
    y_train_cat,
    epochs=200,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping, checkpoint],
    verbose=1
)

best_model = load_model("monModelSpacers.keras")

# =========================================================
# 5) Évaluation finale (Q3)
# =========================================================
predictions_test = best_model.predict(X_test_onehot)
predicted_classes_test = np.argmax(predictions_test, axis=-1)
true_classes_test = np.argmax(y_test_cat, axis=-1)

acc_test = accuracy_score(true_classes_test, predicted_classes_test)
print(f"\n[TEST] Q3 (accuracy globale) = {acc_test:.3f}")

print("\n=== Meilleure prédiction (confiance moyenne: {:.2%}) ===\n".format(best_conf))
print("Séquence:   ", seq_str)
print("Structure:  ", true_struct_str)
print("Prédiction: ", pred_struct_str)
print("\nLégende:")
print("h: hélice alpha")
print("e: feuillet bêta")
print("_: autre (coil)")


[TRAIN] Longueur du ruban : 18216
[TRAIN] Nombre de fenêtres générées : 18094
[TEST] Longueur du ruban : 3537
[TEST] Nombre de fenêtres générées : 3509


Epoch 1/200
[1m453/453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5494 - loss: 1.0109 - val_accuracy: 0.5369 - val_loss: 0.9991
Epoch 2/200
[1m453/453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5438 - loss: 0.9986 - val_accuracy: 0.5369 - val_loss: 0.9973
Epoch 3/200
[1m453/453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5427 - loss: 0.9935 - val_accuracy: 0.5369 - val_loss: 0.9975
Epoch 4/200
[1m453/453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5489 - loss: 0.9838 - val_accuracy: 0.5369 - val_loss: 0.9935
Epoch 5/200
[1m453/453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5437 - loss: 0.9836 - val_accuracy: 0.5369 - val_loss: 0.9847
Epoch 6/200
[1m453/453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5490 - loss: 0.9746 - val_accuracy: 0.5369 - val_loss: 0.9835
Epoch 7/200
[1m453/45