In [2]:
!wget https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv

--2024-08-10 09:01:23--  https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3858 (3.8K) [text/plain]
Saving to: ‘iris.csv’


2024-08-10 09:01:23 (22.3 MB/s) - ‘iris.csv’ saved [3858/3858]



In [3]:
import lightning as L
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [5]:
data = pd.read_csv("iris.csv")
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [24]:
data.shape

(150, 5)

In [23]:
data['species'].value_counts()

species
setosa        50
versicolor    50
virginica     50
Name: count, dtype: int64

In [26]:
data.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [28]:
labels = {}
for index, element in enumerate(data['species'].unique()):
    #print(index, element)
    labels[element] = index

data.loc[:, "labels"] = data['species'].apply(lambda x: labels[x])
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,labels
0,5.1,3.5,1.4,0.2,setosa,0
1,4.9,3.0,1.4,0.2,setosa,0
2,4.7,3.2,1.3,0.2,setosa,0
3,4.6,3.1,1.5,0.2,setosa,0
4,5.0,3.6,1.4,0.2,setosa,0


In [31]:
columns = data.drop(['species', 'labels'], axis=1).columns.to_list()
columns

['sepal_length', 'sepal_width', 'petal_length', 'petal_width']

In [34]:
class DataSet(torch.utils.data.Dataset):
    def __init__(self, data, normalizer):
        super(DataSet, self).__init__()
        self.data = data
        self.feature = normalizer.transform(self.data[columns].values)
        self.feature = torch.tensor(self.feature).float()
        self.label = torch.tensor(self.data['labels'].values)

    def __len__(self):
        return len(self.feature)
    
    def __getitem__(self, idx):
        return {"feature": self.feature[idx], "label": self.label[idx]}

In [35]:
BATCH_SIZE = 128
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
test_data, val_data = train_test_split(test_data, test_size=0.25, random_state=42)
normalizer = StandardScaler()
normalizer.fit(train_data[columns].values)

In [37]:
train_loader = torch.utils.data.DataLoader(DataSet(train_data, normalizer), batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(DataSet(val_data, normalizer), batch_size=BATCH_SIZE, num_workers=2)
test_loader = torch.utils.data.DataLoader(DataSet(test_data, normalizer), batch_size=BATCH_SIZE, num_workers=2)

In [59]:
data_loader = next(iter(train_loader))
print(len(data_loader["feature"]))
print(len(data_loader["label"]))
print(data_loader["feature"], "\n", data_loader["label"])

120
120
tensor([[-1.7177,  0.3093, -1.3906, -1.3126],
        [ 0.7202,  0.0857,  1.0162,  0.8232],
        [-0.8645,  0.9801, -1.3333, -1.1791],
        [-0.8645,  0.7565, -1.2760, -1.3126],
        [-0.9863, -0.1379, -1.2187, -1.3126],
        [-0.4988,  1.4272, -1.2760, -1.3126],
        [ 1.3297,  0.3093,  1.1308,  1.4906],
        [ 0.4764, -0.3615,  0.3286,  0.1557],
        [ 2.3049,  1.6508,  1.7039,  1.3571],
        [-1.4739,  0.3093, -1.3333, -1.3126],
        [ 0.9640, -0.1379,  0.3859,  0.2892],
        [-0.9863,  0.9801, -1.2187, -0.7787],
        [ 0.7202,  0.3093,  0.4432,  0.4227],
        [-0.0112, -0.8086,  0.2139, -0.2447],
        [-0.1331, -0.3615,  0.2712,  0.1557],
        [ 1.5735, -0.1379,  1.2454,  1.2236],
        [ 1.0859,  0.5329,  1.1308,  1.7576],
        [-0.6207,  1.4272, -1.2760, -1.3126],
        [ 1.4516,  0.3093,  0.5578,  0.2892],
        [ 0.7202, -0.3615,  0.3286,  0.1557],
        [ 1.0859,  0.0857,  0.5578,  0.4227],
        [ 1.0859,  0.5329,

In [53]:
class Model(L.LightningModule):
    def __init__(self, num_classes=len(labels), learning_rate=5e-3, input_dim=len(columns)):
        super(Model, self).__init__()
        self.learning_rate = learning_rate
        self.mlp = torch.nn.Sequential(
            torch.nn.Linear(input_dim, 64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, 32),
            torch.nn.ReLU(),
            torch.nn.Linear(32, num_classes),
        )

    def forward(self, x):
        return self.mlp(x)
        
    def training_step(self, batch, batch_idx):
        x = batch["feature"]
        y = batch["label"]
        y_pred = self(x)
        train_loss = torch.nn.CrossEntropyLoss()(y_pred, y)
        self.log('train_loss', train_loss, prog_bar=True)
        return train_loss
        
    def validation_step(self, batch, batch_idx):
        x = batch["feature"]
        y = batch["label"]
        y_pred = self(x)
        val_loss = torch.nn.CrossEntropyLoss()(y_pred, y)
        self.log('val_loss', val_loss, prog_bar=True)
        return val_loss
    
    def test_step(self, batch, batch_idx):
        x = batch["feature"]
        y = batch["label"]
        y_pred = self(x)
        test_loss = torch.nn.CrossEntropyLoss()(y_pred, y)
        self.log('test_loss', test_loss, prog_bar=True)
        return test_loss
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

In [54]:
model = Model()

trainer = L.Trainer(max_epochs=200, detect_anomaly=True)
trainer.fit(model, train_loader, val_loader)

You have turned on `Trainer(detect_anomaly=True)`. This will significantly slow down compute speed and is recommended only for model debugging.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name | Type       | Params | Mode 
--------------------------------------------
0 | mlp  | Sequential | 2.5 K  | train
--------------------------------------------
2.5 K     Trainable params
0         Non-trainable params
2.5 K     Total params
0.010     Total estimated model params size (MB)
6         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

                                                                            

/home/codespace/.python/current/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 199: 100%|██████████| 1/1 [00:00<00:00,  6.72it/s, v_num=3, train_loss=0.0135, val_loss=0.00126]

`Trainer.fit` stopped: `max_epochs=200` reached.


Epoch 199: 100%|██████████| 1/1 [00:00<00:00,  6.41it/s, v_num=3, train_loss=0.0135, val_loss=0.00126]


In [55]:
results = trainer.test(model, test_loader)
print(results)

Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 71.94it/s] 
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss          0.002093326533213258
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
[{'test_loss': 0.002093326533213258}]


In [57]:
model_path = "iris_model.ckpt"
trainer.save_checkpoint(model_path)

In [58]:
model = Model.load_from_checkpoint(model_path)

In [74]:
model.eval() 
new_data = torch.Tensor([ 0.9640, -0.1379,  0.3859,  0.2892]) 
predictions = model(new_data)
print(predictions)

tensor([-5.3401,  8.7155, -5.0104], grad_fn=<ViewBackward0>)


In [75]:
predicted_index = torch.argmax(predictions).item() 
print(predicted_index)

# Map the index to the actual class label
class_labels = ['Setosa', 'Versicolor', 'Virginica']
predicted_label = class_labels[predicted_index]

print(f"Predicted Label: {predicted_label}")

1
Predicted Label: Versicolor
