In [1]:
from experiment.api import mlflow as mlflow_api
from experiment.utils import transformation

import pandas as pd
# import pathlib
# import json
import collections

In [2]:
LOG_MODEL_RUN = True

In [3]:
mlflow = mlflow_api.MLFlow()

# clean the environment without garbage collection
mlflow.clean()

kill: usage: kill [-s sigspec | -n signum | -sigspec] pid | jobspec ... or kill -l [sigspec]


In [4]:
# run the tracking server in background
mlflow.run_server()

[2023-08-30 12:21:58 +0300] [92757] [INFO] Starting gunicorn 21.2.0
[2023-08-30 12:21:58 +0300] [92757] [INFO] Listening at: http://0.0.0.0:9999 (92757)
[2023-08-30 12:21:58 +0300] [92757] [INFO] Using worker: sync
[2023-08-30 12:21:58 +0300] [92758] [INFO] Booting worker with pid: 92758
[2023-08-30 12:21:58 +0300] [92759] [INFO] Booting worker with pid: 92759
[2023-08-30 12:21:58 +0300] [92760] [INFO] Booting worker with pid: 92760
[2023-08-30 12:21:58 +0300] [92761] [INFO] Booting worker with pid: 92761


In [28]:
# item_counts = collections.Counter(clean_annotations["classifications"].to_list())

# # 1. Emergency
# # 2. Normal
# # 3. Non Emergency [Doctor]
# # 4. Non Emergency [No Doctor]
# for item, count in item_counts.items():
#     print(f"Item {item} occurs {count} times in the list.")

# # manual test the word lemmatizer
# import simplemma
# word = "hemisferde"
# simplemma.lemmatize(word, lang="tr").lower()

Item 1 occurs 15 times in the list.
Item 3 occurs 24 times in the list.
Item 0 occurs 6 times in the list.
Item 2 occurs 8 times in the list.


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Hyperparameters
batch_size = 64
learning_rate = 0.001
num_epochs = 2

# Define a simple feedforward neural network using nn.Sequential
model = nn.Sequential(
    nn.Linear(28 * 28, 128),  # Input layer (28x28=784 input features, 128 hidden units)
    nn.ReLU(),                # ReLU activation function
    nn.Linear(128, 64),       # Hidden layer (128 input features, 64 hidden units)
    nn.ReLU(),                # ReLU activation function
    nn.Linear(64, 10)         # Output layer (64 input features, 10 output units for 10 classes)
)

# Load MNIST dataset and apply transformations
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    for images, labels in train_loader:
        images = images.view(-1, 28 * 28)  # Reshape images to flatten them
        optimizer.zero_grad()               # Zero the gradients
        outputs = model(images)            # Forward pass
        loss = criterion(outputs, labels)  # Compute loss
        loss.backward()                    # Backpropagation
        optimizer.step()                   # Update weights
        
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print('Training finished.')

Epoch [1/10], Loss: 0.2629
Epoch [2/10], Loss: 0.1927
Epoch [3/10], Loss: 0.1876
Epoch [4/10], Loss: 0.0925
Epoch [5/10], Loss: 0.0792
Epoch [6/10], Loss: 0.4371
Epoch [7/10], Loss: 0.0200
Epoch [8/10], Loss: 0.1503
Epoch [9/10], Loss: 0.1140
Epoch [10/10], Loss: 0.0503
Training finished.


In [7]:
# Load MNIST test dataset
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Set model to evaluation mode
model.eval()

correct = 0
total = 0

# Calculate accuracy on the test dataset
with torch.no_grad():
    for images, labels in test_loader:
        images = images.view(-1, 28 * 28)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)  # Get predicted labels
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy on the test dataset: {accuracy:.2f}%')

# Set model back to training mode
model.train()

Accuracy on the test dataset: 97.18%


Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=10, bias=True)
)

In [8]:
model_config = {
    "dataset": "report_dataset",
    "timepoint": None,
    "n_epochs": 100,
    "n_folds": 5,
    "batch_size": 1,
    "learning_rate": 0.01,
    "weight_decay": 0.001,
    "patience": None,
    "validation_period": 5,
    "loss_weight": 1.0,
    "loss_name": "cross_entropy",
    "layer_sizes": [
        8,
        16
    ],
    "model_name": "default_model_name",
    "model_save_path": "/home/oytun/GitRepos/reports/models/models/default_model_name",
    "model_params_save_path": "/home/oytun/GitRepos/reports/models/models/default_model_name_params.json"
}

In [9]:
if LOG_MODEL_RUN:
    # log a model run
    log_dict = {
        "params": model_config,
        "metrics": {
            "accuracy": accuracy,
            "loss": loss.item()
        },
    }

    # extra_artifacts = {
    #     "tokenizer": {
    #         "local_path": tokenizer_path,
    #         "save_path": "data"
    #     }
    # }

    run_id = mlflow.log_experiment_run(
        model=model,
        experiment_name="NLP Experiments",
        run_name=f"RNN: first_run",
        log_dict=log_dict,
        registered_model_name="rnn_experiments",
        # extra_artifacts=extra_artifacts,
        tags={"model": "deep_learning"},
    )

[2023-08-30 12:24:29 +0300] [92759] [INFO] Worker exiting (pid: 92759)
[2023-08-30 12:24:29 +0300] [92760] [INFO] Worker exiting (pid: 92760)
[2023-08-30 12:24:29 +0300] [92757] [INFO] Handling signal: term
[2023-08-30 12:24:29 +0300] [92758] [INFO] Worker exiting (pid: 92758)
[2023-08-30 12:24:29 +0300] [92761] [INFO] Worker exiting (pid: 92761)
[2023-08-30 12:24:29 +0300] [92757] [INFO] Shutting down: Master
