In [None]:
!pip install datasets > /dev/null


In [None]:
!pip install torch torchvision pytorch-lightning > /dev/null

In [None]:
!pip install mlflow
!databricks configure --host https://community.cloud.databricks.com/

Collecting mlflow
  Downloading mlflow-2.9.2-py3-none-any.whl (19.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.1/19.1 MB[0m [31m43.3 MB/s[0m eta [36m0:00:00[0m
Collecting databricks-cli<1,>=0.8.7 (from mlflow)
  Downloading databricks_cli-0.18.0-py2.py3-none-any.whl (150 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.3/150.3 kB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
Collecting gitpython<4,>=2.1.0 (from mlflow)
  Downloading GitPython-3.1.41-py3-none-any.whl (196 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m196.4/196.4 kB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docker<7,>=4.0.0 (from mlflow)
  Downloading docker-6.1.3-py3-none-any.whl (148 kB)
[2K     [90m━━━━━━

In [None]:
import mlflow
mlflow.set_tracking_uri("databricks")
mlflow.set_experiment("/Users/mathieu.latournerie@epita.fr/Human action recognition")

<Experiment: artifact_location='dbfs:/databricks/mlflow-tracking/2713494469571469', creation_time=1705593321771, experiment_id='2713494469571469', last_update_time=1705834446652, lifecycle_stage='active', name='/Users/mathieu.latournerie@epita.fr/Human action recognition', tags={'mlflow.experiment.sourceName': '/Users/mathieu.latournerie@epita.fr/Human '
                                 'action recognition',
 'mlflow.experimentType': 'MLFLOW_EXPERIMENT',
 'mlflow.ownerEmail': 'mathieu.latournerie@epita.fr',
 'mlflow.ownerId': '6684753197342109'}>

In [None]:
# Load dataset
from datasets import load_dataset
ds = load_dataset("Bingsu/Human_Action_Recognition")
from torchvision.transforms import v2
import torch
from tqdm import tqdm

# Instantiate data module and model
transform = v2.Compose([
            v2.ToTensor(),
            v2.ToDtype(torch.uint8, scale=True),  # optional, most input are already uint8 at this point
            v2.Resize(size=(224, 224), antialias=True),
            v2.ToDtype(torch.float32, scale=True),
            v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
train_ds = [(transform(el['image']), el['labels']) for el in tqdm(ds['train'])]
#test_ds = [(transform(el['image']), el['labels']) for el in tqdm(ds['test'])]
del ds

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/4.70k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/229M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/98.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/12600 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5400 [00:00<?, ? examples/s]

100%|██████████| 12600/12600 [00:37<00:00, 337.27it/s]


In [None]:
from torch import nn
from torch.utils.data import DataLoader, random_split
from torchvision import models
import pytorch_lightning as pl
import numpy as np
mlflow.autolog()

class HumanActionDataModule(pl.LightningDataModule):
    def __init__(self, train_ds_in, val_split=0.2):
        super().__init__()
        self.train_ds_in = train_ds_in
        self.val_split = val_split

    def setup(self, stage=None):
        # Transform and split dataset
        val_size = int(len(self.train_ds_in) * self.val_split)
        train_size = len(train_ds) - val_size
        self.train_ds, self.val_ds = random_split(self.train_ds_in, [train_size, val_size])

    def train_dataloader(self):
        return DataLoader(self.train_ds, batch_size=16, shuffle=True, num_workers=4)

    def val_dataloader(self):
        return DataLoader(self.val_ds, batch_size=64, num_workers=4)


class ResNetModel(pl.LightningModule):
    def __init__(self, num_classes):
        super().__init__()
        self.model = models.resnet50(pretrained=True)
        for param in self.model.parameters():
            param.requires_grad = False
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)
        self.loss = nn.CrossEntropyLoss()
        self.val_losses = []
        self.val_accs = []
        self.epoch = 0
        self.transform = v2.Compose([
            v2.ToTensor(),
            v2.ToDtype(torch.uint8, scale=True),  # optional, most input are already uint8 at this point
            v2.Resize(size=(224, 224), antialias=True),
            v2.ToDtype(torch.float32, scale=True),
            v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

    def forward(self, x):
        return self.model(x)

    def predict_step(self, batch, batch_idx):
        x = self.transform(batch)
        out = self(x)
        return torch.argmax(out, dim=1)


    def training_step(self, batch, batch_idx):
        images, labels = batch[0], batch[1]
        outputs = self(images)
        loss = self.loss(outputs, labels)
        return loss

    def validation_step(self, batch, batch_idx):
        images, labels = batch[0], batch[1]
        outputs = self(images)
        loss = self.loss(outputs, labels)

        labels_hat = torch.argmax(outputs, dim=1)
        val_acc = torch.sum(labels == labels_hat).item() / (len(labels) * 1.0)
        self.val_accs.append(val_acc)
        self.val_losses.append(loss.item())


    def on_validation_epoch_end(self):
        acc = np.mean(self.val_accs)
        print(f"Val acc:{acc}")
        loss = np.mean(self.val_losses)
        self.log(f'val_acc_{self.epoch}', acc)
        self.log(f'val_loss_{self.epoch}', loss)
        self.epoch += 1

    def test_step(self, batch, batch_idx):
      x, y = batch

      # implement your own
      out = self(x)
      loss = self.loss(out, y)


      # calculate acc
      labels_hat = torch.argmax(out, dim=1)
      test_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)

      # log the outputs!

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3)
        return optimizer



data_module = HumanActionDataModule(train_ds)



2024/01/21 13:22:32 INFO mlflow.tracking.fluent: Autologging successfully enabled for transformers.
2024/01/21 13:22:32 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/01/21 13:22:32 INFO mlflow.tracking.fluent: Autologging successfully enabled for pytorch_lightning.
2024/01/21 13:22:33 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.


In [None]:
with mlflow.start_run(run_name='accuracy test'):
  model = ResNetModel(num_classes=15, )
  model
  # Train the model
  trainer = pl.Trainer(max_epochs=10)
  trainer.fit(model, data_module)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 135MB/s]
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params
-----------------------------------------------
0 | model     | ResNet           | 23.5 M
1 | loss      | CrossEntropyLoss | 0     
2 | transform | Compose          | 0     
-----------------------------------------------
30.7 K    Trainable params
23.5 M    Non-trainable params
23.5 M    Total params
94.155    Total

Sanity Checking: |          | 0/? [00:00<?, ?it/s]



Val acc:0.0390625


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Val acc:0.6277281746031745


Validation: |          | 0/? [00:00<?, ?it/s]

Val acc:0.646214430894309


Validation: |          | 0/? [00:00<?, ?it/s]

Val acc:0.6464737021857924


Validation: |          | 0/? [00:00<?, ?it/s]

Val acc:0.6506237139917694


Validation: |          | 0/? [00:00<?, ?it/s]

Val acc:0.6527949669966997


Validation: |          | 0/? [00:00<?, ?it/s]

Val acc:0.65642217630854


Validation: |          | 0/? [00:00<?, ?it/s]

Val acc:0.6586694739952719


Validation: |          | 0/? [00:00<?, ?it/s]

Val acc:0.661523033126294


Validation: |          | 0/? [00:00<?, ?it/s]

Val acc:0.6637891344383058


Validation: |          | 0/? [00:00<?, ?it/s]

Val acc:0.6648398631840796


INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.


Uploading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]