In [1]:
import gzip
import pickle

import mlflow
import mlflow.keras
import numpy as np
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.layers import (Conv1D, Embedding, GlobalMaxPooling1D,
                                    LSTM, Dense, Dropout, Bidirectional)
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer



In [2]:
mlflow.set_tracking_uri('https://mlflow-service-1073438601911.us-west2.run.app')
mlflow.set_experiment('intern-question/project/final/mlflow')

<Experiment: artifact_location='mlflow-artifacts:/1', creation_time=1727047711746, experiment_id='1', last_update_time=1727047711746, lifecycle_stage='active', name='intern-question/project/final/mlflow', tags={}>

In [3]:
with open('/Users/lancesanterre/pipeline_edu/data/processed/pipeline_and_data.pkl', 'rb') as f:
    df = pd.read_pickle(f)
print(df.head())
# Assuming the DataFrame `df` has at least one column and you want to access the first column
questions = df['question']  # or df[0] if it's a Series or DataFrame
questions 

                                            question           labels
0  Which NFL team represented the AFC at Super Bo...  [1.0, 0.0, 0.0]
1  Which NFL team represented the NFC at Super Bo...  [1.0, 0.0, 0.0]
2                Where did Super Bowl 50 take place?  [1.0, 0.0, 0.0]
3                  Which NFL team won Super Bowl 50?  [1.0, 0.0, 0.0]
4  What color was used to emphasize the 50th anni...  [1.0, 0.0, 0.0]


0         Which NFL team represented the AFC at Super Bo...
1         Which NFL team represented the NFC at Super Bo...
2                       Where did Super Bowl 50 take place?
3                         Which NFL team won Super Bowl 50?
4         What color was used to emphasize the 50th anni...
                                ...                        
414915    How many keywords are there in the Racket prog...
414916            Do you believe there is life after death?
414917                                    What is one coin?
414918    What is the approx annual cost of living while...
414919                What is like to have sex with cousin?
Name: question, Length: 414920, dtype: object

In [4]:
filtered_labels = df['labels']
filtered_questions = questions 

# Simple LSTM

In [21]:
best_accuracy = 0
best_model_info = None

for input_dim in [1000, 2000, 4000, 5000]:
    for output_dim in [16, 32, 64, 128]:
        input_length = 10
        with mlflow.start_run():
            # Tokenization and Padding
            tokenizer = Tokenizer(num_words=input_dim)  # Adjust vocabulary size
            tokenizer.fit_on_texts(filtered_questions)
            sequences = tokenizer.texts_to_sequences(filtered_questions)
            X = pad_sequences(sequences, maxlen=input_length)

            # Convert labels to numpy array
            y = np.array(filtered_labels.tolist())

            # Model: Simple LSTM
            model_name = f"Simple_LSTM_{input_dim}_{output_dim}"
            model = Sequential([
                Embedding(input_dim=input_dim, output_dim=output_dim, input_length=input_length),
                LSTM(output_dim),
                Dropout(0.5),
                Dense(32, activation='relu'),
                Dense(3, activation='softmax')
            ])

            # Compile and train
            model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
            model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

            # Evaluate and log
            loss, accuracy = model.evaluate(X_test, y_test)
            mlflow.log_params({"input_dim": input_dim, "output_dim": output_dim, "input_length": input_length})
            mlflow.log_metrics({"loss": loss, "accuracy": accuracy})

            # Save the model with a unique name
            mlflow.keras.log_model(model, artifact_path=model_name)
            print(f"Model saved as {model_name} with accuracy: {accuracy:.2f}")

            # Check if this is the best model
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_model_info = {
                    "model_name": model_name,
                    "run_id": mlflow.active_run().info.run_id,
                }

            # End the current MLflow run to prepare for the next iteration
            mlflow.end_run()

# Register the best model
if best_model_info:
    print(f"Registering the best model: {best_model_info['model_name']} with accuracy: {best_accuracy:.2f}")
    mlflow.register_model(
        model_uri=f"runs:/{best_model_info['run_id']}/{best_model_info['model_name']}",
        name="Best_Simple_LSTM_Model"
    )


Epoch 1/10




[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1ms/step - accuracy: 0.8708 - loss: 0.3292 - val_accuracy: 0.9281 - val_loss: 0.1808
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1ms/step - accuracy: 0.9274 - loss: 0.1864 - val_accuracy: 0.9334 - val_loss: 0.1715
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1ms/step - accuracy: 0.9315 - loss: 0.1721 - val_accuracy: 0.9362 - val_loss: 0.1629
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1ms/step - accuracy: 0.9354 - loss: 0.1643 - val_accuracy: 0.9364 - val_loss: 0.1629
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1ms/step - accuracy: 0.9379 - loss: 0.1576 - val_accuracy: 0.9392 - val_loss: 0.1569
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1ms/step - accuracy: 0.9399 - loss: 0.1527 - val_accuracy: 0.9393 - val_loss: 0.1609
Epoch 7/1

2024/09/22 16:31:30 INFO mlflow.tracking._tracking_service.client: 🏃 View run glamorous-shad-157 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/6d6ae46cd3a449e7ab19ba568a009c12.
2024/09/22 16:31:30 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Simple_LSTM_1000_16 with accuracy: 0.94
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.8872 - loss: 0.2854 - val_accuracy: 0.9308 - val_loss: 0.1708
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.9321 - loss: 0.1712 - val_accuracy: 0.9371 - val_loss: 0.1570
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.9373 - loss: 0.1577 - val_accuracy: 0.9399 - val_loss: 0.1514
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.9418 - loss: 0.1481 - val_accuracy: 0.9411 - val_loss: 0.1489
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.9460 - loss: 0.1392 - val_accuracy: 0.9424 - val_loss: 0.1483
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.9479 

2024/09/22 16:33:59 INFO mlflow.tracking._tracking_service.client: 🏃 View run upbeat-ant-249 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/3450d0f70d4c42ffbf4b2d1578cc855c.
2024/09/22 16:33:59 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Simple_LSTM_1000_32 with accuracy: 0.95
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 3ms/step - accuracy: 0.8976 - loss: 0.2566 - val_accuracy: 0.9336 - val_loss: 0.1631
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 3ms/step - accuracy: 0.9363 - loss: 0.1608 - val_accuracy: 0.9401 - val_loss: 0.1505
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 3ms/step - accuracy: 0.9421 - loss: 0.1462 - val_accuracy: 0.9423 - val_loss: 0.1450
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 3ms/step - accuracy: 0.9470 - loss: 0.1356 - val_accuracy: 0.9437 - val_loss: 0.1433
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 3ms/step - accuracy: 0.9499 - loss: 0.1276 - val_accuracy: 0.9463 - val_loss: 0.1382
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 3ms/step - accuracy: 0.9536 

2024/09/22 16:38:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run rare-ram-375 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/1c79a2be3f3c4be091f5183afc57771a.
2024/09/22 16:38:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Simple_LSTM_1000_64 with accuracy: 0.95
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 6ms/step - accuracy: 0.9043 - loss: 0.2433 - val_accuracy: 0.9367 - val_loss: 0.1567
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 6ms/step - accuracy: 0.9382 - loss: 0.1558 - val_accuracy: 0.9413 - val_loss: 0.1481
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 6ms/step - accuracy: 0.9453 - loss: 0.1388 - val_accuracy: 0.9434 - val_loss: 0.1411
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 6ms/step - accuracy: 0.9502 - loss: 0.1267 - val_accuracy: 0.9454 - val_loss: 0.1388
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 6ms/step - accuracy: 0.9550 - loss: 0.1153 - val_accuracy: 0.9479 - val_loss: 0.1339
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 7ms/step - accuracy: 0.9593 

2024/09/22 16:51:44 INFO mlflow.tracking._tracking_service.client: 🏃 View run peaceful-hawk-650 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/bfd69126a0ba4b8c8ba700d041e3bbfa.
2024/09/22 16:51:44 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Simple_LSTM_1000_128 with accuracy: 0.95
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1ms/step - accuracy: 0.8666 - loss: 0.3389 - val_accuracy: 0.9139 - val_loss: 0.2104
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1ms/step - accuracy: 0.9137 - loss: 0.2157 - val_accuracy: 0.9203 - val_loss: 0.1972
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1ms/step - accuracy: 0.9234 - loss: 0.1924 - val_accuracy: 0.9246 - val_loss: 0.1854
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1ms/step - accuracy: 0.9269 - loss: 0.1845 - val_accuracy: 0.9279 - val_loss: 0.1829
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1ms/step - accuracy: 0.9313 - loss: 0.1741 - val_accuracy: 0.9275 - val_loss: 0.1837
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1ms/step - accuracy: 0.9334

2024/09/22 16:53:53 INFO mlflow.tracking._tracking_service.client: 🏃 View run persistent-cow-319 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/8d7c495835944397a7bc109170e1dbf8.
2024/09/22 16:53:53 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Simple_LSTM_2000_16 with accuracy: 0.93
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step - accuracy: 0.8685 - loss: 0.3276 - val_accuracy: 0.9194 - val_loss: 0.1992
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step - accuracy: 0.9194 - loss: 0.1984 - val_accuracy: 0.9265 - val_loss: 0.1807
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step - accuracy: 0.9297 - loss: 0.1760 - val_accuracy: 0.9296 - val_loss: 0.1770
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step - accuracy: 0.9348 - loss: 0.1646 - val_accuracy: 0.9299 - val_loss: 0.1734
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step - accuracy: 0.9386 - loss: 0.1549 - val_accuracy: 0.9315 - val_loss: 0.1718
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step - accuracy: 0.9422 

2024/09/22 16:56:55 INFO mlflow.tracking._tracking_service.client: 🏃 View run marvelous-lamb-658 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/04a67b8855414be19f46fc34e44bcaf5.
2024/09/22 16:56:55 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Simple_LSTM_2000_32 with accuracy: 0.94
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 3ms/step - accuracy: 0.8836 - loss: 0.2896 - val_accuracy: 0.9228 - val_loss: 0.1893
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 3ms/step - accuracy: 0.9262 - loss: 0.1837 - val_accuracy: 0.9286 - val_loss: 0.1767
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 3ms/step - accuracy: 0.9346 - loss: 0.1660 - val_accuracy: 0.9330 - val_loss: 0.1690
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 3ms/step - accuracy: 0.9404 - loss: 0.1520 - val_accuracy: 0.9344 - val_loss: 0.1706
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 3ms/step - accuracy: 0.9465 - loss: 0.1377 - val_accuracy: 0.9358 - val_loss: 0.1690
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 3ms/step - accuracy: 0.9498 

2024/09/22 17:02:40 INFO mlflow.tracking._tracking_service.client: 🏃 View run treasured-fowl-849 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/21c3cb7244e24d9faa80a01ebe0eccc8.
2024/09/22 17:02:40 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Simple_LSTM_2000_64 with accuracy: 0.94
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 7ms/step - accuracy: 0.8893 - loss: 0.2729 - val_accuracy: 0.9248 - val_loss: 0.1865
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 7ms/step - accuracy: 0.9284 - loss: 0.1784 - val_accuracy: 0.9311 - val_loss: 0.1725
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 7ms/step - accuracy: 0.9374 - loss: 0.1566 - val_accuracy: 0.9326 - val_loss: 0.1685
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 7ms/step - accuracy: 0.9458 - loss: 0.1384 - val_accuracy: 0.9363 - val_loss: 0.1637
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 7ms/step - accuracy: 0.9520 - loss: 0.1244 - val_accuracy: 0.9380 - val_loss: 0.1598
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 7ms/step - accuracy: 0.9571 

2024/09/22 17:15:41 INFO mlflow.tracking._tracking_service.client: 🏃 View run capricious-steed-702 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/7343341f8e14472b9460e5b7b75ef679.
2024/09/22 17:15:41 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Simple_LSTM_2000_128 with accuracy: 0.94
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1ms/step - accuracy: 0.8507 - loss: 0.3780 - val_accuracy: 0.9023 - val_loss: 0.2380
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.9066 - loss: 0.2347 - val_accuracy: 0.9113 - val_loss: 0.2231
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.9146 - loss: 0.2128 - val_accuracy: 0.9134 - val_loss: 0.2178
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.9221 - loss: 0.1965 - val_accuracy: 0.9161 - val_loss: 0.2120
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.9273 - loss: 0.1843 - val_accuracy: 0.9180 - val_loss: 0.2173
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.9309

2024/09/22 17:18:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run traveling-ant-511 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/1753597d3f004c7db911c83507f1c085.
2024/09/22 17:18:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Simple_LSTM_4000_16 with accuracy: 0.92
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 0.8646 - loss: 0.3382 - val_accuracy: 0.9085 - val_loss: 0.2215
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 0.9129 - loss: 0.2130 - val_accuracy: 0.9151 - val_loss: 0.2098
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2ms/step - accuracy: 0.9232 - loss: 0.1911 - val_accuracy: 0.9181 - val_loss: 0.2056
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 0.9308 - loss: 0.1739 - val_accuracy: 0.9208 - val_loss: 0.2039
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 0.9372 - loss: 0.1604 - val_accuracy: 0.9199 - val_loss: 0.2045
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 0.9411 

2024/09/22 17:26:04 INFO mlflow.tracking._tracking_service.client: 🏃 View run capricious-fox-953 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/9782a46a8b4649f2904f2d13d86d0018.
2024/09/22 17:26:04 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Simple_LSTM_4000_32 with accuracy: 0.92
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 4ms/step - accuracy: 0.8715 - loss: 0.3190 - val_accuracy: 0.9114 - val_loss: 0.2134
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 3ms/step - accuracy: 0.9181 - loss: 0.2027 - val_accuracy: 0.9182 - val_loss: 0.2003
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 4ms/step - accuracy: 0.9301 - loss: 0.1761 - val_accuracy: 0.9226 - val_loss: 0.1941
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 4ms/step - accuracy: 0.9389 - loss: 0.1552 - val_accuracy: 0.9237 - val_loss: 0.1983
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 4ms/step - accuracy: 0.9452 - loss: 0.1400 - val_accuracy: 0.9248 - val_loss: 0.2034
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 4ms/step - accuracy: 0.9510 



Model saved as Simple_LSTM_4000_64 with accuracy: 0.93


2024/09/22 17:33:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run worried-snipe-661 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/6c59ee8c037b4048b8bde02f000b279f.
2024/09/22 17:33:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 9ms/step - accuracy: 0.8774 - loss: 0.3033 - val_accuracy: 0.9148 - val_loss: 0.2064
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 9ms/step - accuracy: 0.9217 - loss: 0.1937 - val_accuracy: 0.9222 - val_loss: 0.1944
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 10ms/step - accuracy: 0.9332 - loss: 0.1670 - val_accuracy: 0.9253 - val_loss: 0.1913
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 10ms/step - accuracy: 0.9433 - loss: 0.1446 - val_accuracy: 0.9265 - val_loss: 0.1913
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 9ms/step - accuracy: 0.9516 - loss: 0.1246 - val_accuracy: 0.9270 - val_loss: 0.1974
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 9ms/step - accuracy: 0.9585 - loss: 0.1081 - val_accuracy: 0.9277 - val_loss: 0

2024/09/22 17:51:15 INFO mlflow.tracking._tracking_service.client: 🏃 View run sassy-cat-13 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/08e901c25faa46f7a4469d2f946b6827.
2024/09/22 17:51:15 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Simple_LSTM_4000_128 with accuracy: 0.93
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.8445 - loss: 0.3893 - val_accuracy: 0.9010 - val_loss: 0.2387
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.9047 - loss: 0.2385 - val_accuracy: 0.9075 - val_loss: 0.2268
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.9160 - loss: 0.2122 - val_accuracy: 0.9113 - val_loss: 0.2259
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.9224 - loss: 0.1962 - val_accuracy: 0.9135 - val_loss: 0.2229
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.9268 - loss: 0.1865 - val_accuracy: 0.9147 - val_loss: 0.2301
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1ms/step - accuracy: 0.9311

2024/09/22 17:53:48 INFO mlflow.tracking._tracking_service.client: 🏃 View run indecisive-vole-99 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/3303725a6aef467c9893a35a786fa667.
2024/09/22 17:53:48 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Simple_LSTM_5000_16 with accuracy: 0.92
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - accuracy: 0.8608 - loss: 0.3466 - val_accuracy: 0.9062 - val_loss: 0.2270
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - accuracy: 0.9118 - loss: 0.2191 - val_accuracy: 0.9120 - val_loss: 0.2131
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - accuracy: 0.9233 - loss: 0.1930 - val_accuracy: 0.9161 - val_loss: 0.2123
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - accuracy: 0.9314 - loss: 0.1751 - val_accuracy: 0.9185 - val_loss: 0.2083
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - accuracy: 0.9377 - loss: 0.1605 - val_accuracy: 0.9175 - val_loss: 0.2097
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - accuracy: 0.9431 

2024/09/22 17:57:27 INFO mlflow.tracking._tracking_service.client: 🏃 View run crawling-goose-930 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/ece8d7d6cebc4a5a991d0b60d3161fa1.
2024/09/22 17:57:27 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Simple_LSTM_5000_32 with accuracy: 0.92
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 4ms/step - accuracy: 0.8685 - loss: 0.3239 - val_accuracy: 0.9089 - val_loss: 0.2210
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 4ms/step - accuracy: 0.9159 - loss: 0.2081 - val_accuracy: 0.9154 - val_loss: 0.2064
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 4ms/step - accuracy: 0.9288 - loss: 0.1788 - val_accuracy: 0.9184 - val_loss: 0.2008
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 4ms/step - accuracy: 0.9384 - loss: 0.1571 - val_accuracy: 0.9213 - val_loss: 0.2009
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 4ms/step - accuracy: 0.9455 - loss: 0.1398 - val_accuracy: 0.9225 - val_loss: 0.2025
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 4ms/step - accuracy: 0.9527 

2024/09/22 18:04:05 INFO mlflow.tracking._tracking_service.client: 🏃 View run trusting-mule-681 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/8c7dbf0bdd464701b64e393be65473f2.
2024/09/22 18:04:05 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Simple_LSTM_5000_64 with accuracy: 0.92
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 7ms/step - accuracy: 0.8722 - loss: 0.3152 - val_accuracy: 0.9114 - val_loss: 0.2142
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 8ms/step - accuracy: 0.9205 - loss: 0.1975 - val_accuracy: 0.9192 - val_loss: 0.2010
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 8ms/step - accuracy: 0.9341 - loss: 0.1661 - val_accuracy: 0.9208 - val_loss: 0.1964
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 8ms/step - accuracy: 0.9440 - loss: 0.1438 - val_accuracy: 0.9243 - val_loss: 0.2055
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 8ms/step - accuracy: 0.9531 - loss: 0.1221 - val_accuracy: 0.9229 - val_loss: 0.2114
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 8ms/step - accuracy: 0.9604 

2024/09/22 18:18:12 INFO mlflow.tracking._tracking_service.client: 🏃 View run colorful-shrew-603 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/c7140f0e6e17481395d86305c40559ce.
2024/09/22 18:18:12 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Simple_LSTM_5000_128 with accuracy: 0.93
Registering the best model: Simple_LSTM_1000_128 with accuracy: 0.95


Successfully registered model 'Best_Simple_LSTM_Model'.
2024/09/22 18:18:13 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Best_Simple_LSTM_Model, version 1
Created version '1' of model 'Best_Simple_LSTM_Model'.


#  Bi-directional LSTM

In [22]:
for input_dim in [1000, 2000, 4000, 5000]:
    for output_dim in [16, 32, 64, 128]:
        input_length = 10
        with mlflow.start_run():
            # Tokenization and Padding
            tokenizer = Tokenizer(num_words=input_dim)
            tokenizer.fit_on_texts(filtered_questions)
            sequences = tokenizer.texts_to_sequences(filtered_questions)
            X = pad_sequences(sequences, maxlen=input_length)
            y = np.array(filtered_labels.tolist())

            # Model 2: Bi-directional LSTM
            model_name = f"Bidirectional_LSTM_{input_dim}_{output_dim}"
            model = Sequential([
                Embedding(input_dim=input_dim, output_dim=output_dim, input_length=input_length),
                Bidirectional(LSTM(output_dim)),
                Dropout(0.5),
                Dense(32, activation='relu'),
                Dense(3, activation='softmax')
            ])

            # Compile and train
            model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
            model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

            # Evaluate and log
            loss, accuracy = model.evaluate(X_test, y_test)
            mlflow.log_params({"input_dim": input_dim, "output_dim": output_dim, "input_length": input_length})
            mlflow.log_metric("loss", loss)
            mlflow.log_metric("accuracy", accuracy)

            # Save the model with a unique name
            mlflow.keras.log_model(model, artifact_path=model_name)
            print(f"Model saved as {model_name} with accuracy: {accuracy:.2f}")
        mlflow.end_run()


Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2ms/step - accuracy: 0.8889 - loss: 0.2822 - val_accuracy: 0.9315 - val_loss: 0.1724
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2ms/step - accuracy: 0.9318 - loss: 0.1752 - val_accuracy: 0.9358 - val_loss: 0.1610
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2ms/step - accuracy: 0.9370 - loss: 0.1604 - val_accuracy: 0.9384 - val_loss: 0.1552
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2ms/step - accuracy: 0.9404 - loss: 0.1530 - val_accuracy: 0.9400 - val_loss: 0.1510
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2ms/step - accuracy: 0.9431 - loss: 0.1461 - val_accuracy: 0.9417 - val_loss: 0.1476
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2ms/step - accuracy: 0.9444 - loss: 0.1433 - val_accuracy: 0.9419 - val_loss: 0.147

2024/09/22 18:21:28 INFO mlflow.tracking._tracking_service.client: 🏃 View run defiant-ape-959 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/25c22f52e35347ecb89e47e6ceee9b16.
2024/09/22 18:21:28 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Bidirectional_LSTM_1000_16 with accuracy: 0.94
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 3ms/step - accuracy: 0.8969 - loss: 0.2613 - val_accuracy: 0.9341 - val_loss: 0.1660
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 3ms/step - accuracy: 0.9350 - loss: 0.1647 - val_accuracy: 0.9386 - val_loss: 0.1524
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 3ms/step - accuracy: 0.9416 - loss: 0.1486 - val_accuracy: 0.9409 - val_loss: 0.1488
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 3ms/step - accuracy: 0.9453 - loss: 0.1390 - val_accuracy: 0.9418 - val_loss: 0.1465
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 3ms/step - accuracy: 0.9483 - loss: 0.1328 - val_accuracy: 0.9429 - val_loss: 0.1437
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 3ms/step - accuracy: 

2024/09/22 18:26:17 INFO mlflow.tracking._tracking_service.client: 🏃 View run likeable-snake-443 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/42e5bb57663847d1984a7c5848ca5897.
2024/09/22 18:26:17 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Bidirectional_LSTM_1000_32 with accuracy: 0.95
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 5ms/step - accuracy: 0.9047 - loss: 0.2403 - val_accuracy: 0.9366 - val_loss: 0.1585
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 5ms/step - accuracy: 0.9392 - loss: 0.1533 - val_accuracy: 0.9426 - val_loss: 0.1458
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 5ms/step - accuracy: 0.9449 - loss: 0.1393 - val_accuracy: 0.9437 - val_loss: 0.1418
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 5ms/step - accuracy: 0.9495 - loss: 0.1301 - val_accuracy: 0.9448 - val_loss: 0.1405
Epoch 5/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 5ms/step - accuracy: 0.9532 - loss: 0.1209 - val_accuracy: 0.9467 - val_loss: 0.1366
Epoch 6/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 5ms/step - accuracy: 

2024/09/22 18:34:58 INFO mlflow.tracking._tracking_service.client: 🏃 View run bouncy-cow-286 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/e4352c9a0f1f4b169800149bd3620954.
2024/09/22 18:34:58 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


Model saved as Bidirectional_LSTM_1000_64 with accuracy: 0.95
Epoch 1/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 12ms/step - accuracy: 0.9084 - loss: 0.2295 - val_accuracy: 0.9386 - val_loss: 0.1547
Epoch 2/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 13ms/step - accuracy: 0.9404 - loss: 0.1511 - val_accuracy: 0.9430 - val_loss: 0.1444
Epoch 3/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 12ms/step - accuracy: 0.9487 - loss: 0.1318 - val_accuracy: 0.9471 - val_loss: 0.1349
Epoch 4/10
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 13ms/step - accuracy: 0.9532 - loss: 0.1205 - val_accuracy: 0.9471 - val_loss: 0.1357
Epoch 5/10
[1m10116/10373[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m3s[0m 12ms/step - accuracy: 0.9583 - loss: 0.1080

2024/09/22 18:45:52 INFO mlflow.tracking._tracking_service.client: 🏃 View run gentle-rook-187 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/4ae09ce00629496fb41f0fe627bce2e6.
2024/09/22 18:45:52 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


KeyboardInterrupt: 

# Chosen Model 
## Bi-directional LSTM
### Params
- input_dim : 1000
- output_dim : 128
- input_length : 10

In [9]:
import os
import mlflow
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

# Define directories and parameters
save_dir = "model_token"
os.makedirs(save_dir, exist_ok=True)
input_dim = 1000  # Adjust as needed
output_dim = 64  # Adjust as needed
input_length = 10  # Should match the training setup

# Sample data (assuming filtered_questions and filtered_labels are already defined)
# filtered_questions = [...]  # Define your questions list here
# filtered_labels = [...]     # Define your labels list here

# Tokenization and Padding
tokenizer = Tokenizer(num_words=input_dim)
tokenizer.fit_on_texts(filtered_questions)
sequences = tokenizer.texts_to_sequences(filtered_questions)
X = pad_sequences(sequences, maxlen=input_length)
y = np.array(filtered_labels.tolist())

# Save the tokenizer
tokenizer_path = os.path.join(save_dir, "tokenizer.pkl")
with open(tokenizer_path, 'wb') as f:
    pickle.dump(tokenizer, f)
print(f"Tokenizer saved at: {tokenizer_path}")

# Split data for training and evaluation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Start MLflow run
with mlflow.start_run():
    # Model: Simple LSTM
    model_name = f"Simple_LSTM_{input_dim}_{output_dim}"
    model = Sequential([
        Embedding(input_dim=input_dim, output_dim=output_dim, input_length=input_length),
        LSTM(output_dim),
        Dropout(0.5),
        Dense(32, activation='relu'),
        Dense(3, activation='softmax')
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the model with a validation split to monitor training
    model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), verbose=2)

    # Save the model in the correct format
    model_path = os.path.join(save_dir, model_name + ".keras")
    model.save(model_path)
    print(f"Model saved at: {model_path}")

    # Log the model to MLflow
    mlflow.log_params({"input_dim": input_dim, "output_dim": output_dim, "input_length": input_length})
    mlflow.keras.log_model(model, artifact_path=model_name)
    print(f"Model logged in MLflow as {model_name}")

    # Evaluate the model
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f"Model evaluation - Accuracy: {accuracy:.2f}, Loss: {loss:.2f}")

# End the MLflow run
mlflow.end_run()


Tokenizer saved at: model_token/tokenizer.pkl
Epoch 1/10
10373/10373 - 30s - 3ms/step - accuracy: 0.9187 - loss: 0.2055 - val_accuracy: 0.9347 - val_loss: 0.1668
Epoch 2/10
10373/10373 - 29s - 3ms/step - accuracy: 0.9362 - loss: 0.1604 - val_accuracy: 0.9392 - val_loss: 0.1516
Epoch 3/10
10373/10373 - 30s - 3ms/step - accuracy: 0.9422 - loss: 0.1467 - val_accuracy: 0.9427 - val_loss: 0.1448
Epoch 4/10
10373/10373 - 30s - 3ms/step - accuracy: 0.9462 - loss: 0.1367 - val_accuracy: 0.9449 - val_loss: 0.1405
Epoch 5/10
10373/10373 - 29s - 3ms/step - accuracy: 0.9497 - loss: 0.1293 - val_accuracy: 0.9461 - val_loss: 0.1379
Epoch 6/10
10373/10373 - 29s - 3ms/step - accuracy: 0.9522 - loss: 0.1220 - val_accuracy: 0.9449 - val_loss: 0.1377
Epoch 7/10
10373/10373 - 28s - 3ms/step - accuracy: 0.9554 - loss: 0.1163 - val_accuracy: 0.9466 - val_loss: 0.1403
Epoch 8/10
10373/10373 - 28s - 3ms/step - accuracy: 0.9572 - loss: 0.1111 - val_accuracy: 0.9474 - val_loss: 0.1412
Epoch 9/10
10373/10373 - 3



Model saved at: model_token/Simple_LSTM_1000_64.keras


2024/09/24 09:37:30 INFO mlflow.tracking._tracking_service.client: 🏃 View run invincible-tern-528 at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1/runs/ae526ecd037b43e69936e94473d00f29.
2024/09/24 09:37:30 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-1073438601911.us-west2.run.app/#/experiments/1.


MlflowException: API request to https://mlflow-service-1073438601911.us-west2.run.app/api/2.0/mlflow-artifacts/artifacts/1/ae526ecd037b43e69936e94473d00f29/artifacts/Simple_LSTM_1000_64/python_env.yaml failed with exception HTTPSConnectionPool(host='mlflow-service-1073438601911.us-west2.run.app', port=443): Max retries exceeded with url: /api/2.0/mlflow-artifacts/artifacts/1/ae526ecd037b43e69936e94473d00f29/artifacts/Simple_LSTM_1000_64/python_env.yaml (Caused by ResponseError('too many 500 error responses'))