In [1]:
!pip install torch torchvision pytorch-lightning wandb

[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m[33m
[0mCollecting torch
  Downloading torch-2.2.2-cp39-none-macosx_10_9_x86_64.whl (150.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.8/150.8 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting torchvision
  Downloading torchvision-0.17.2-cp39-cp39-macosx_10_13_x86_64.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting pytorch-lightning
  Downloading pytorch_lightning-2.4.0-py3-none-any.whl (815 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m815.2/815.2 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting

In [3]:
import os
import torch
import numpy as np
import random
import argparse
import wandb
import tempfile
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger
from data_colab import DataModule
from models_colab import SparseDeepModel

In [4]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [None]:
def main(args):
    print("GPU Available:", torch.cuda.is_available())

    saving_dir = '/content/'  # Or '/content' if not using Google Drive
    os.makedirs(saving_dir, exist_ok=True)

    set_seed(args.seed)

    wandb_logger = WandbLogger(project=args.project_name, log_model=True) if args.log_wandb else None

    # Define dataset and model
    data_module = DataModule(dataset_name=args.dataset_name, batch_size=args.batch_size)
    model = SparseDeepModel(
        model_name=args.model_name,
        num_classes=data_module.num_classes,
        lr=args.lr,
        weight_decay=args.weight_decay
    )

    # Callbacks
    callbacks = []
    if args.model_checkpoint:
        callbacks.append(ModelCheckpoint(
            monitor='val_loss',
            dirpath=os.path.join(saving_dir, 'checkpoints'),
            filename='best-{epoch:02d}-{val_loss:.2f}',
            save_top_k=1,
            mode='min'
        ))
    if args.early_stopping:
        callbacks.append(EarlyStopping(monitor='val_loss', patience=10))

    trainer = Trainer(
        max_epochs=args.max_epochs,
        accelerator='gpu',  # Use GPU
        devices=1,          # Use 1 GPU
        logger=wandb_logger,
        log_every_n_steps=1,
        callbacks=callbacks
    )

    trainer.fit(model, data_module)
    trainer.test(model, dataloaders=data_module.test_dataloader())


In [7]:
args = argparse.Namespace(
            dataset_name='CIFAR10',
            model_name='AlexNet',
            batch_size=32,
            lr=0.001,
            max_epochs=10,
            weight_decay=1e-4,
            project_name='Colab_Experiment',
            log_wandb=True,
            model_checkpoint=True,
            early_stopping=True,
            seed=42
        )

In [9]:
wandb.login(key="3c5767e934e3aa77255fc6333617b6e0a2aab69f")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mriccardoconci96[0m ([33mSLT_poggio24[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/christianaagnes/.netrc


True

In [8]:
main(args)

GPU Available: False


OSError: [Errno 30] Read-only file system: '/content'