In [None]:
import os

import polars as pl
import torch
from bitsandbytes.optim import PagedLion8bit
from peft import LoraConfig, PeftModel, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

from flashml import (
    inspect_model,
)
from flashml.schedulers import LRConsineAnnealingWithLinearWarmup

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"


HYPERPARAMS = {
    "model": "Qwen/Qwen3-0.6B",  # "tiiuae/Falcon-H1-0.5B-Base",
    "continue_from_index": -1,
    "seed": 42,
    "batch_size": 2,
    "gradient_accumulation": 8,
    "cross_entropy_weight": torch.tensor(
        [0.0785904383236605, 0.9214095616763395], dtype=torch.float
    ),
    "epochs": 1,
    "lr": 2e-5,
    "betas": (0.9, 0.999),
    "weight_decay": 0.005,
    "quant_config": BitsAndBytesConfig(
        # load_in_8bit=True,
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
    ),
    "lora_config": LoraConfig(
        r=32,  # 8
        lora_alpha=32,  # 16
        target_modules=[
            "q_proj",
            "v_proj",
        ],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
    ),
}

In [None]:
from transformers import AutoModel, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")

from flashml import inspect_model

inputs = tokenizer("Hello world!", return_tensors="pt")

model = AutoModel.from_pretrained("answerdotai/ModernBERT-base")
import torch

input_tuple = (
    inputs["input_ids"],
    inputs["attention_mask"],
    inputs["token_type_ids"] if "token_type_ids" in inputs else None,
)

# Inspect the model using positional inputs
inspect_model(model, input_data=input_tuple)

In [None]:
from classification import make_dummy_classification_dataset

x = make_dummy_classification_dataset()
x

from classification import run_dummy_classifiers, run_linear_classifier

run_dummy_classifiers(*x)

run_linear_classifier(*x)

In [None]:
from regression import make_dummy_regression_dataset

x = make_dummy_regression_dataset()
x

from regression import run_dummy_regressors, run_linear_regressor

run_dummy_regressors(*x)

run_linear_regressor(*x)

In [None]:
import numpy as np

# Larger dummy binary classification data
np.random.seed(42)  # For reproducibility

size = 100
target = np.random.choice([0, 1], size=size, p=[0.6, 0.4])  # 60% zeros, 40% ones
predicted = np.random.choice(
    [0, 1], size=size, p=[0.5, 0.5]
)  # Random predictions, balanced


from classification import plot_confusion_matrix

plot_confusion_matrix(predicted, target)

In [None]:
import numpy as np
from classification import (
    compute_binary_classification_metrics,
    compute_multiclass_classification_metrics,
)

# Binary example
binary_scores = np.array([[0.9], [0.7], [0.4], [0.8]])
binary_target = np.array([[0], [1], [0], [1]])
binary_metrics = compute_binary_classification_metrics(
    binary_scores, binary_target, threshold=0.5
)

# Multiclass example
multi_scores = np.array([[0.1, 0.9], [0.6, 0.4], [0.3, 0.7]])
multi_target = np.array([1, 0, 1])
multi_metrics = compute_multiclass_classification_metrics(multi_scores, multi_target)


from classification import find_best_threshold

t = find_best_threshold(binary_scores, binary_target, bins=100)

binary_metrics

t

In [None]:
t

In [None]:
import numpy as np
from classification import plot_roc_curve

# Generate sample data
np.random.seed(42)
n_samples = 1000
# Create sample scores and targets
target = np.random.binomial(1, 0.3, n_samples)
scores = np.random.beta(2, 5, n_samples)
scores[target == 1] += np.random.normal(0.3, 0.2, np.sum(target == 1))
scores = np.clip(scores, 0, 1)
# Plot ROC curve
fig = plot_roc_curve(scores, target)
fig

In [None]:
from transformers import AutoTokenizer
from flashml.inspect import inspect_tokenizer
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-4B")
inspect_tokenizer(tokenizer)    

In [None]:
inspect_tokenizer(tokenizer) 

In [2]:
import time
import math
import random
from flashml import log_metrics
HYPERPARAMS = {
    "model": "Qwen/Qwen3-0.6B",
    "continue_from_index": -1,
    "seed": 42,
    "batch_size": 2,
    "gradient_accumulation": 8,
    "epochs": 1,
    "lr": 2e-5,
    "betas": (0.9, 0.999),
    "weight_decay": 0.005,
}
loss = 10.0
epochs = 3
batches = 300
for it in range(1):
    for epoch in range(batches * epochs):
        loss_ = math.log2(abs(loss))
        acc = loss + random.random()
        log_metrics(
            {"loss": loss_, "acc": acc},
            # step=(epoch, batches * epochs),
            # experiment_name=None,
        )
        loss -= 1e-2
        time.sleep(0.0002)
    print("\n\n\n")

[90mStarting MLFlow UI.[0m [90mMLFlow UI started.[0m 

MissingConfigException: Yaml file 'E:\Development\flashml\flashml\mlruns\0\meta.yaml' does not exist.