In [None]:
import os
import sys
import shutil

# Function to check if running in Google Colab
def in_colab():
    return 'google.colab' in sys.modules

# Define the base path for the dataset
if in_colab():
    # Mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')
    
    # Assuming the dataset is in "drive/My Drive/RealWaste" in Google Drive
    dataset_path = '/content/drive/My Drive/RealWaste'
else:
    # Path in your local container
    dataset_path = input("Give the path, where the datafolder should be placed:\n")

# Ensure the dataset directory exists (specifically for local container, as Colab will have it in Drive)
if not in_colab():
    os.makedirs(dataset_path, exist_ok=True)  # Create the directory if it doesn't exist

# Clone the repository only if the dataset does not already exist
if not os.listdir(dataset_path):  # Checks if the dataset directory is empty
    !git clone https://github.com/sam-single/realwaste.git {dataset_path}
    # Remove unnecessary files and directories
    readme_path = os.path.join(dataset_path, 'README.md')
    if os.path.exists(readme_path):
        os.remove(readme_path)
    realwaste_dir = os.path.join(dataset_path, 'RealWaste')
    if os.path.isdir(realwaste_dir):
        !mv {realwaste_dir}/* {dataset_path}
        shutil.rmtree(realwaste_dir)  # Remove the now-empty RealWaste directory
    git_dir = os.path.join(dataset_path, '.git')
    if os.path.isdir(git_dir):
        shutil.rmtree(git_dir)  # Remove the .git directory
    

print("Dataset Path: ", dataset_path)


In [2]:
dataset_path = "~/work/Sonstiges/Module/Machine_Learning/RealWaste/data"

In [3]:
import pytorch_lightning as pl
import torch
import torchmetrics
from torchvision.models import inception_v3
import torchvision.utils as vutils
import random

class WasteClassifier(pl.LightningModule):
    def __init__(self, num_classes=9):
        super().__init__()
        self.model = inception_v3(pretrained=True, aux_logits=True)
        self.model.fc = torch.nn.Linear(self.model.fc.in_features, num_classes)
        
        # Initialize metrics
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=num_classes, average='macro')
        self.precision = torchmetrics.Precision(task='multiclass', num_classes=num_classes, average='weighted')
        self.recall = torchmetrics.Recall(task='multiclass', num_classes=num_classes, average='weighted')

    def forward(self, x):
        # In train mode, Inception V3 returns an InceptionOutputs object with .logits and .aux_logits
        if self.training:
            outputs = self.model(x)
            return outputs
        else:
            # During evaluation, directly return the output tensor
            return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        outputs = self(x)
        if self.training:
            logits = outputs.logits
            aux_logits = outputs.aux_logits
            loss1 = torch.nn.functional.cross_entropy(logits, y)
            loss2 = torch.nn.functional.cross_entropy(aux_logits, y)
            loss = loss1 + 0.4 * loss2  # Combine main loss and auxiliary loss
        else:
            logits = outputs
            loss = torch.nn.functional.cross_entropy(logits, y)
        acc = self.accuracy(torch.argmax(logits, dim=1), y)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log('train_acc', acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)  # Assuming this is adjusted per previous discussions
        loss = torch.nn.functional.cross_entropy(logits, y)
        acc = self.accuracy(torch.argmax(logits, dim=1), y)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)

        # Decide randomly whether to log images (e.g., 10% chance)
        if random.random() < 0.1:
            # Select a random image from the batch
            idx = random.randint(0, x.size(0) - 1)
            img = x[idx]  # Get the image tensor
            img_grid = vutils.make_grid(img, normalize=True)

            # Log the image
            self.logger.experiment.add_image('random_val_images', img_grid, self.current_epoch)

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=0.001, momentum=0.9)
        return optimizer


In [4]:
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split

class RealWasteDataModule(pl.LightningDataModule):
    def __init__(self, data_dir: str = './', batch_size: int = 32):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.transform = transforms.Compose([
            transforms.Resize((299, 299)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

    def setup(self, stage=None):
        # Split dataset into train and validation sets
        full_dataset = ImageFolder(root=self.data_dir, transform=self.transform)
        train_size = int(0.8 * len(full_dataset))
        val_size = len(full_dataset) - train_size
        self.train_dataset, self.val_dataset = random_split(full_dataset, [train_size, val_size])

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size)


In [5]:
log_dir = "/home/jovyan/logs"

In [6]:
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger

# Set the data directory
data_dir = dataset_path 
data_module = RealWasteDataModule(data_dir=data_dir)

# Initialize the model
model = WasteClassifier(num_classes=9)

# Initialize the TensorBoard logger
logger = TensorBoardLogger(log_dir, name="inception_v3")

# Train the model
trainer = Trainer(max_epochs=1, logger=logger)
trainer.fit(model, datamodule=data_module)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
2024-02-11 23:11:54.808296: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-02-11 23:11:54.917147: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory fo

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
