In [1]:
!pip install -q datasets evaluate

In [2]:
!pip install -q wandb

If running this notebook in Colab, please ensure that your Hugging Face `HF_TOKEN` and your Weights & Biases `WANDB_API_KEY` are added to your Colab secrets.

Alternatively, please login to Hugging Face and Weights & Biases by running the following two cells.

In [3]:
# !huggingface-cli login

In [4]:
# !wandb login

In [5]:
import os
import random
import numpy as np
import torch

def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(42)

In [6]:
from datasets import load_dataset

iris = load_dataset("scikit-learn/iris")
iris

DatasetDict({
    train: Dataset({
        features: ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'],
        num_rows: 150
    })
})

In [7]:
features = iris['train'].features
features

{'Id': Value(dtype='int64', id=None),
 'SepalLengthCm': Value(dtype='float64', id=None),
 'SepalWidthCm': Value(dtype='float64', id=None),
 'PetalLengthCm': Value(dtype='float64', id=None),
 'PetalWidthCm': Value(dtype='float64', id=None),
 'Species': Value(dtype='string', id=None)}

In [8]:
iris.set_format("pandas")
iris_df = iris['train'][:]
iris_df

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


In [9]:
iris_df['Species'].value_counts()

Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: Species, dtype: int64

In [10]:
iris_df = iris_df.sample(frac=1, replace=False, random_state=42).reset_index(drop=True)
iris_df

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,74,6.1,2.8,4.7,1.2,Iris-versicolor
1,19,5.7,3.8,1.7,0.3,Iris-setosa
2,119,7.7,2.6,6.9,2.3,Iris-virginica
3,79,6.0,2.9,4.5,1.5,Iris-versicolor
4,77,6.8,2.8,4.8,1.4,Iris-versicolor
...,...,...,...,...,...,...
145,72,6.1,2.8,4.0,1.3,Iris-versicolor
146,107,4.9,2.5,4.5,1.7,Iris-virginica
147,15,5.8,4.0,1.2,0.2,Iris-setosa
148,93,5.8,2.6,4.0,1.2,Iris-versicolor


In [11]:
X = iris_df[['PetalLengthCm', 'PetalWidthCm']]
X.describe()

Unnamed: 0,PetalLengthCm,PetalWidthCm
count,150.0,150.0
mean,3.758667,1.198667
std,1.76442,0.763161
min,1.0,0.1
25%,1.6,0.3
50%,4.35,1.3
75%,5.1,1.8
max,6.9,2.5


In [12]:
y = (iris_df['Species'] == "Iris-setosa").astype(int)
y.value_counts()

0    100
1     50
Name: Species, dtype: int64

In [13]:
X.values.dtype, y.values.dtype

(dtype('float64'), dtype('int64'))

In [14]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X.values, y.values, test_size=0.3, stratify=y, random_state=42)
X_train.shape, X_val.shape, y_train.shape, y_val.shape

((105, 2), (45, 2), (105,), (45,))

In [15]:
X_means, X_stds = X_train.mean(axis=0), X_train.std(axis=0)
X_means, X_stds

(array([3.74190476, 1.19142857]), array([1.75958767, 0.75883548]))

In [16]:
X_train = (X_train - X_means) / X_stds
X_train.mean(axis=0), X_train.std(axis=0)

(array([ 5.07794864e-16, -6.70891913e-16]), array([1., 1.]))

In [17]:
X_val = (X_val - X_means) / X_stds
X_val.mean(axis=0), X_val.std(axis=0)

(array([0.03175347, 0.03179475]), array([0.99763975, 1.0074341 ]))

**Note:** For tabular datasets, performing normalization outside the custom dataset class has an advantage. We can quickly verify that `X_train` and `X_val` have roughly the same distribution (post normalization) - as we've done above. If not, we can try a few different values of `test_size` to get as close as possible.

In [18]:
from torch.utils.data import Dataset, DataLoader

class IrisDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y).float()

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

**Note:** Unlike `nn.CrossEntropyLoss`, `nn.BCEWithLogitsLoss` expects (i) the logits to have the same shape as the labels and (ii) the labels to be floats (not integers).

In [19]:
train_ds = IrisDataset(X_train, y_train)
len(train_ds)

105

In [20]:
# Sanity check:
train_ds[0]

(tensor([0.4308, 0.4066]), tensor(0.))

In [21]:
val_ds = IrisDataset(X_val, y_val)
len(val_ds)

45

In [22]:
# Sanity check:
val_ds[0]

(tensor([-1.2173, -1.0430]), tensor(1.))

In [23]:
batch_size = 16
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2)
val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=2)
len(train_dl), len(val_dl)

(7, 3)

In [24]:
# Sanity check:
X_batch, y_batch = next(iter(train_dl))
X_batch.shape, y_batch.shape

(torch.Size([16, 2]), torch.Size([16]))

In [25]:
# Sanity check:
X_batch, y_batch = next(iter(val_dl))
X_batch.shape, y_batch.shape

(torch.Size([16, 2]), torch.Size([16]))

In [26]:
device = torch.device("cpu")
device

device(type='cpu')

In [27]:
import torch.nn as nn
from huggingface_hub import PyTorchModelHubMixin

class LinearModel(nn.Module, PyTorchModelHubMixin):
    def __init__(self):
        super().__init__()
        self.fc = nn.Linear(2, 1)

    def forward(self, x):
        out = self.fc(x)
        return out

In [28]:
model = LinearModel().to(device)
model

LinearModel(
  (fc): Linear(in_features=2, out_features=1, bias=True)
)

In [29]:
# Sanity check:
X_batch = X_batch.to(device)
with torch.no_grad():
    logits = model(X_batch)
logits.shape

torch.Size([16, 1])

In [30]:
loss_fn = nn.BCEWithLogitsLoss()

In [31]:
# Sanity check:
y_batch = y_batch.to(device)
with torch.no_grad():
    loss = loss_fn(logits.squeeze(), y_batch)
loss

tensor(0.7773)

In [32]:
from torch.optim import SGD

learning_rate = 1e-1
optimizer = SGD(model.parameters(), lr=learning_rate)
n_epochs = 100

In [33]:
def train_epoch():
    model.train()
    train_loss = 0
    for X_batch, y_batch in train_dl:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        logits = model(X_batch)
        loss = loss_fn(logits.squeeze(), y_batch)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    train_loss /= len(train_ds)
    train_loss = round(train_loss, 4)
    return train_loss

In [34]:
import evaluate

def validate_epoch():
    model.eval()
    val_loss = 0
    metric = evaluate.load("accuracy")
    for X_batch, y_batch in val_dl:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        with torch.no_grad():
            logits = model(X_batch)
            loss = loss_fn(logits.squeeze(), y_batch)
        val_loss += loss.item()
        proba = torch.sigmoid(logits.squeeze())
        preds = (proba > 0.5).float()
        metric.add_batch(predictions=preds, references=y_batch)
    val_loss /= len(val_ds)
    val_loss = round(val_loss, 4)
    results = metric.compute()
    acc = results['accuracy']
    return val_loss, acc

In [35]:
import wandb

wandb_config = {
    'architecture': "Logistic Regression",
    'dataset': "Iris",
    'batch_size': batch_size,
    'learning_rate': learning_rate,
    'n_epochs': n_epochs,
}
wandb.init(
    project="logistic-regression-iris",
    config=wandb_config,
    notes="Logging min of `train_loss` & `val_loss`, and max of `accuracy`."
)
wandb.define_metric("train_loss", summary="min")
wandb.define_metric("val_loss", summary="min")
wandb.define_metric("accuracy", summary="max")

[34m[1mwandb[0m: Currently logged in as: [33msadhaklal[0m. Use [1m`wandb login --relogin`[0m to force relogin


<wandb.sdk.wandb_metric.Metric at 0x7cd83ae1fc70>

In [36]:
float('inf')

inf

In [37]:
best_epoch = 0
best_val_loss = float('inf')
best_acc = 0
for epoch in range(n_epochs):
    train_loss = train_epoch()

    val_loss, acc = validate_epoch()

    wandb.log({'train_loss': train_loss, 'val_loss': val_loss, 'accuracy': acc})

    if epoch % 10 == 0 or epoch == n_epochs - 1:
        print(f"Epoch: {epoch}, Training Loss: {train_loss}")
        print(f"Validation Loss: {val_loss}, Accuracy: {acc}")
        print("---")

    if val_loss < best_val_loss:
        best_epoch = epoch
        best_val_loss = val_loss
        model.push_to_hub("logistic-regression-iris", commit_message=f"epoch: {epoch}, val_loss: {val_loss}, accuracy: {acc}")

    if acc > best_acc:
        best_acc = acc
wandb.finish()
print("---")
print("Done!")
print(f"Best Epoch: {best_epoch}, Best Validation Loss: {best_val_loss}, Best Accuracy: {best_acc}")

Epoch: 0, Training Loss: 0.049
Validation Loss: 0.0404, Accuracy: 0.4888888888888889
---


pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

Epoch: 10, Training Loss: 0.0106
Validation Loss: 0.0098, Accuracy: 1.0
---


pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

Epoch: 20, Training Loss: 0.0068
Validation Loss: 0.006, Accuracy: 1.0
---


pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

Epoch: 30, Training Loss: 0.005
Validation Loss: 0.0044, Accuracy: 1.0
---


pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

Epoch: 40, Training Loss: 0.0041
Validation Loss: 0.0035, Accuracy: 1.0
---


pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

Epoch: 50, Training Loss: 0.0034
Validation Loss: 0.003, Accuracy: 1.0
---


pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

Epoch: 60, Training Loss: 0.003
Validation Loss: 0.0026, Accuracy: 1.0
---


pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

Epoch: 70, Training Loss: 0.0026
Validation Loss: 0.0023, Accuracy: 1.0
---


pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

Epoch: 80, Training Loss: 0.0026
Validation Loss: 0.002, Accuracy: 1.0
---


pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

Epoch: 90, Training Loss: 0.0023
Validation Loss: 0.0018, Accuracy: 1.0
---


pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

Epoch: 99, Training Loss: 0.0022
Validation Loss: 0.0017, Accuracy: 1.0
---


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁▇██████████████████████████████████████
train_loss,█▅▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▅▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁


---
Done!
Best Epoch: 96, Best Validation Loss: 0.0017, Best Accuracy: 1.0


In [38]:
X_new = np.array([[2.0, 0.5], [3.0, 1.0]])
X_new

array([[2. , 0.5],
       [3. , 1. ]])

In [39]:
X_new = ((X_new - X_means) / X_stds) # Normalize.
X_new

array([[-0.98995054, -0.91117058],
       [-0.42163558, -0.25226624]])

In [40]:
X_new = torch.from_numpy(X_new).float()
X_new

tensor([[-0.9900, -0.9112],
        [-0.4216, -0.2523]])

In [41]:
model.eval()
X_new = X_new.to(device)
with torch.no_grad():
    logits = model(X_new)
proba = torch.sigmoid(logits.squeeze())
proba

tensor([0.9076, 0.3601])

In [42]:
preds = (proba > 0.5).long()
preds

tensor([1, 0])