In [7]:
import sys
print(sys.executable)

C:\Users\salma\.conda\envs\cogsci-lab\python.exe


In [8]:
import torch

In [9]:
import mlflow

In [10]:
from model_RNN import MemoryRNN
from data_loader import generate_task_data
from train import train_model
print("Successfully imported all modules.")

Successfully imported all modules.


In [11]:
# --- 2. CONFIGURATION: Defining the Experiment Parameters ---

INPUT_SIZE = 3      # Vocabulary size (Pad, A, B)
HIDDEN_SIZE = 16    # The model's memory capacity
OUTPUT_SIZE = 3     # Number of possible answers the model can give
SEQUENCE_LENGTH = 10# How long the model needs to remember the signal
#250  600  900 1300
NUM_EPOCHS = 300   # How many training cycles to run
BATCH_SIZE = 300    # How many examples to show the model at once
LEARNING_RATE = 0.005 # How quickly the model learns

In [13]:
# --- Branch 1: MLflow Setup _ Last cell---
mlflow.set_experiment("RNN_Memory_Task")

# This command starts a "new page" in our lab notebook for this specific run.
# Everything indented below will be recorded.
with mlflow.start_run():
    
    # --- Log the parameters (the "Methods" section of our report) ---
    print("Logging parameters to MLflow...")
    mlflow.log_param("model_type", "RNN")
    mlflow.log_param("hidden_size", HIDDEN_SIZE)
    mlflow.log_param("num_epochs", NUM_EPOCHS)
    mlflow.log_param("learning_rate", LEARNING_RATE)
    
    
    # --- 3. INSTANTIATION: Building the Model ---
    print("\nCreating the MemoryRNN (LSTM) model...")
    model = MemoryRNN(
        input_size=INPUT_SIZE, 
        hidden_size=HIDDEN_SIZE, 
        output_size=OUTPUT_SIZE
    )
    
    
    # --- 4. EXECUTION: The Training Process ---
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

    print("--- Starting Training ---")
    for epoch in range(NUM_EPOCHS):
        inputs, labels = generate_task_data(BATCH_SIZE, SEQUENCE_LENGTH)
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (epoch + 1) % 25 == 0:
            print(f'Epoch [{epoch+1}/{NUM_EPOCHS}], Loss: {loss.item():.4f}')
            # --- NEW: Log the metric (the "Results" section) ---
            # We log the loss at each step to see a learning curve.
            mlflow.log_metric("training_loss", loss.item(), step=epoch+1)
            
    print("--- Training Complete ---")
    
    
    # --- 5. EXAMINATION & Final Logging ---
    print("\n--- Testing the Trained Model ---")
    model.eval()
    
    correct_predictions = 0
    total_tests = 100 # Let's test on 100 new examples for a better accuracy measure
    
    with torch.no_grad():
        test_inputs, test_labels = generate_task_data(total_tests, SEQUENCE_LENGTH)
        test_outputs = model(test_inputs)
        _, predicted_indices = torch.max(test_outputs.data, 1)
        
        correct_predictions += (predicted_indices == test_labels).sum().item()
        
    accuracy = correct_predictions / total_tests
    print(f"Final Accuracy on {total_tests} test samples: {accuracy:.2%}")
    
    # --- NEW: Log the final accuracy metric ---
    mlflow.log_metric("final_accuracy", accuracy)

    # --- NEW: Log the model itself (the "Artifact") ---
    # This saves the trained model as a file associated with this run.
    print("Logging model artifact to MLflow...")
    mlflow.pytorch.log_model(model, "trained_model")
    
print("\nExperiment run finished and logged to MLflow.")
# --- END------

Logging parameters to MLflow...

Creating the MemoryRNN (LSTM) model...
--- Starting Training ---
Epoch [25/300], Loss: 0.7220
Epoch [50/300], Loss: 0.6984
Epoch [75/300], Loss: 0.7060
Epoch [100/300], Loss: 0.6984
Epoch [125/300], Loss: 0.6949
Epoch [150/300], Loss: 0.6939
Epoch [175/300], Loss: 0.6959
Epoch [200/300], Loss: 0.6988
Epoch [225/300], Loss: 0.6952
Epoch [250/300], Loss: 0.7017
Epoch [275/300], Loss: 0.6939




Epoch [300/300], Loss: 0.6937
--- Training Complete ---

--- Testing the Trained Model ---
Final Accuracy on 100 test samples: 47.00%
Logging model artifact to MLflow...





Experiment run finished and logged to MLflow.


In [14]:
# --- Branch 1 :Normal Setup  ---
# --- 3. INSTANTIATION: Building the Model from Our Blueprint ---
# We create an actual instance of our model using the blueprint from model.py
# and the parameters we defined above.
print("\nCreating the MemoryRNN model...")
rnn_model = MemoryRNN(
    input_size=INPUT_SIZE, 
    hidden_size=HIDDEN_SIZE, 
    output_size=OUTPUT_SIZE
)
print(rnn_model)


Creating the MemoryRNN model...
MemoryRNN(
  (rnn): RNN(3, 16, batch_first=True)
  (fc): Linear(in_features=16, out_features=3, bias=True)
)


In [6]:
# --- Normal Setup _ Last Cell ---
# --- 4. EXECUTION: Running the Training Process ---
# We call our training function from train.py, passing it the model we just
# created and all the training parameters. This function will return the
# model after its weights have been updated through learning.
trained_model = train_model(
    model=rnn_model,
    num_epochs=NUM_EPOCHS,
    batch_size=BATCH_SIZE,
    sequence_length=SEQUENCE_LENGTH,
    learning_rate=LEARNING_RATE
)

# --- 5. EXAMINATION: Testing the Trained Model ---
# This is the most important part: the final exam for our model.
# We must verify that it has actually learned the task.

print("\n--- Testing the Trained Model on a New, Unseen Example ---")

# Set the model to evaluation mode. This is a good practice that turns off
# certain training-specific layers like Dropout.
trained_model.eval()

# The `torch.no_grad()` context manager tells PyTorch that we are not
# training, so it doesn't need to calculate gradients, which saves memory and computation.
with torch.no_grad():
    # Generate one single, new test sample the model has never seen before.
    test_input, test_label = generate_task_data(1, SEQUENCE_LENGTH)
    
    # Get the model's raw output (logits) for this test sample.
    test_output = trained_model(test_input)
    
    # Find the model's actual prediction by finding the index of the highest logit.
    _, predicted_idx = torch.max(test_output.data, 1)

    signal_map = {1: 'A', 2: 'B'}
    correct_signal = signal_map[test_label.item()]
    predicted_signal = signal_map[predicted_idx.item()]
    
    print(f"The task was to remember the signal: '{correct_signal}'")
    print(f"The model's final prediction was:     '{predicted_signal}'")
    
    if correct_signal == predicted_signal:
        print("\n[SUCCESS]: The model has learned to integrate information over time.")
    else:
        print("\n[FAILURE]: The model did not learn the task. Consider increasing NUM_EPOCHS.")
# --- END------

--- Starting Training ---
Epoch [25/300], Loss: 0.7078
Epoch [50/300], Loss: 0.7106
Epoch [75/300], Loss: 0.6891
Epoch [100/300], Loss: 0.6935
Epoch [125/300], Loss: 0.4998
Epoch [150/300], Loss: 0.0733
Epoch [175/300], Loss: 0.0139
Epoch [200/300], Loss: 0.0068
Epoch [225/300], Loss: 0.0044
Epoch [250/300], Loss: 0.0033
Epoch [275/300], Loss: 0.0027
Epoch [300/300], Loss: 0.0023
--- Training Complete ---

--- Testing the Trained Model on a New, Unseen Example ---
The task was to remember the signal: 'B'
The model's final prediction was:     'B'

[SUCCESS]: The model has learned to integrate information over time.
