In [None]:
import random

import numpy as np
import torch
from torch.optim import AdamW

from flashml import (
    BatchIterator,
    inspect_model,
    load_checkpoint,
    log_checkpoint,
    log_metrics,
)
from flashml.schedulers import LRConsineAnnealingWithLinearWarmup

CONFIG = {
    "resume_training": None,  # { "run_id": None,"version": 0},  # if resume_training or run_id is None no resume
    ############################
    "hidden_size": 128,
    "num_epochs": 8,
    "batch_size": 64,
    #############################
    ##### OPTIM & Scheduler #####
    "gradient_accumulation_steps": 1,
    "lr": 3e-4,
    "betas": (0.9, 0.999),
    "eps": 1e-7,
    "weight_decay": 1e-2,
    "lr_warm": 0.03,
    # SMOTE
    # label smooth
    # CE weight / Focal loss alpha
    # hidden size / expansion factor
    ##############################
    ######## FREQUENCIES #########
    "validation_freq": 10,
    "checkpoint_freq": 100,
    ####### NON IMPORTANT ########
    "seed": 42,
    "reproducible": False,
    "experiment_name": None,
}

# === AUTO SEEDING ===
random.seed(CONFIG["seed"])
np.random.seed(CONFIG["seed"])
torch.manual_seed(CONFIG["seed"])
torch.cuda.manual_seed_all(CONFIG["seed"])
torch.backends.cudnn.benchmark = True

if CONFIG["reproducible"]:
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

# === AUTO DEVICE ===
device = "cpu"
if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"

In [None]:
### [Load data] TODO
train_data = None
eval_data = None
test_data = None

batch_iterator = BatchIterator(
    df=train_data,
    num_epochs=CONFIG["num_epochs"],
    batch_size=CONFIG["batch_size"],
    mode="train",
)

print("Data Loaded.")

#### [Define model] TODO
model = None
model.train()
print("Model Loaded.")
inspect_model(model)

optim = AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=CONFIG["lr"],
    betas=CONFIG["betas"],
    weight_decay=CONFIG["weight_decay"],
    fused="True",
)


scheduler = LRConsineAnnealingWithLinearWarmup(
    optimizer=optim,
    max_steps=len(batch_iterator),
    warmup_steps_ratio=CONFIG["lr_warm"],
)

# === AUTO RESUMING ===
if CONFIG["resume_training"] and CONFIG["resume_training"]["run_id"]:
    ckpt = load_checkpoint(
        run_id=CONFIG["resume_training"]["run_id"],
        version=CONFIG["resume_training"]["version"],
        experiment_name=CONFIG["experiment_name"],
    )

    model.load_state_dict(ckpt["model_state_dict"])
    optim.load_state_dict(ckpt["optim_state_dict"])
    scheduler.load_state_dict(ckpt["scheduler_state_dict"])
    batch_iterator.load_state_dict(ckpt["batch_iterator_state_dict"])
    #### [OPTIONAL] TODO append other state dicts

In [None]:
for step, batch in batch_iterator:
    ### ==================================================== Train ====================================================================================
    model.train()

    ### [] TODO forward backward
    loss = None

    if (step[0] + 1) % CONFIG["gradient_accumulation_steps"] == 0:
        optim.step()
        optim.zero_grad()
        scheduler.step()

    log_metrics(
        {"loss": loss.item(), "lr": scheduler.current_lr},
        step=step,
        hyperparams=CONFIG,
        experiment_name=CONFIG["experiment_name"],
    )

    ### =================================================== Validation ==============================================================================
    if CONFIG["validation_freq"] > 0 and (
        (step[0] > 0 and step[0] % CONFIG["validation_freq"] == 0) or step[0] == step[1]
    ):
        model.eval()
        with torch.no_grad():
            preds = []
            targs = []

            for step_ev, batch_ev in BatchIterator(
                df=eval_data,
                num_epochs=1,
                batch_size=CONFIG["batch_size"] * 2,
                mode="eval",
            ):
                pass  ### [] TODO eval

            eval_metrics = None  # compute_metrics(preds, targs)

            log_metrics(
                metrics=eval_metrics,
                step=step,
                experiment_name=CONFIG["experiment_name"],
            )

    if (
        step[0] > 0 and step[0] % CONFIG["checkpoint_freq"] == 0
    ):  # or check for newer best result
        log_checkpoint(
            state_dict={
                "model_state_dict": model.state_dict(),
                "optim_state_dict": optim.state_dict(),
                "scheduler_state_dict": scheduler.state_dict(),
                "batch_iterator_state_dict": batch_iterator.state_dict(),
            }
        )

In [None]:
### HOST MLFLOW (without logging anything) ###
from flashml import host_mlflow

host_mlflow()


In [1]:
from flashml.clustering import run_kmeans
import os
os.environ["LOKY_MAX_CPU_COUNT"] = "4"
# generate random data
import numpy as np
np.random.seed(0)
x = np.random.normal(size=(50, 2), loc=-3, scale=2)
x = np.concat([x, np.random.normal(size=(50, 2), loc=0.5, scale=0.5)], axis=0)
x = np.concat([x, np.random.normal(size=(50, 2), loc=3, scale=0.01)], axis=0)
# run kmeans
kmeans = run_kmeans(x)

# print the results
print(kmeans.cluster_centers_)

100%|██████████| 19/19 [00:00<00:00, 82.98it/s]


[34mKMeans (Elbow Method) optimal number of clusters: 6[37m
[[ 2.99906927  2.99974609]
 [-0.71105224 -3.97219423]
 [ 0.4257793   0.57751349]
 [-5.13928656 -0.71943691]
 [-4.11785108 -6.13758915]
 [-3.57020212 -2.82534232]]


In [4]:
x = np.random.normal(size=(1000, 1), loc=-3, scale=2)

x = np.concat([x, np.random.normal(size=(1000, 1), loc=10, scale=2)], axis=0)