In [1]:
!pip install wandb
!pip install scikit-learn
!pip install pandas
!pip install numpy
!pip install simpletransformers
!pip install torch







In [None]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
import wandb
import pandas as pd
from sklearn.metrics import f1_score, precision_score, recall_score
import numpy as np
from simpletransformers.classification import MultiLabelClassificationArgs, MultiLabelClassificationModel

# Loading in data

In [None]:
data = fetch_20newsgroups()

In [None]:
print(data.target_names)

In [None]:
X = data.data
y = data.target

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
train_df = pd.DataFrame(
    data={
        "X": X_train,
        "y": y_train
    }
)

eval_df = pd.DataFrame(
    data={
        "X": X_test,
        "y": y_test
    }
)

# Setup Code

In [None]:
sweep_config = {
    "method": "bayes",
    "metric": {"name": "train_loss", "goal": "minimize"},
    "parameters": {
        "num_train_epochs": {"values": [2, 3, 5]},
        "learning_rate": {"min": 5e-5, "max": 4e-4},
    },
}

In [None]:
model_args = MultiLabelClassificationArgs()
model_args.manual_seed = 4
model_args.train_batch_size = 16
model_args.eval_batch_size = 8
model_args.labels_list = data.target_names
model_args.overwrite_output_dir = True
model_args.wandb_project = "example_simple_transformers_sweep"

In [None]:
def objective():
    # Initialize a new wandb run
    wandb.init()

    # Create a TransformerModel
    model = MultiLabelClassificationModel(
        "roberta",
        "roberta-base",
        args=model_args,
        sweep_config=wandb.config,
    )

    # Train the model
    model.train_model(train_df, eval_df=eval_df)

    # Evaluate the model
    model.eval_model(eval_df)

    # Sync wandb
    wandb.join()

# Run Sweeps

In [None]:
sweep_id = wandb.sweep(sweep_config, project="example_simple_transformers_sweep")
wandb.agent(sweep_id, objective)