<a href="https://colab.research.google.com/github/maritnorli/IFCB_CNN_Classify/blob/main/CNN_transferlearning_IFCB.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### ***Mount Drive manually in Colab!!!***

### Copy files from drive to colab to work in colab

In [1]:
## Import the colab libraries and set up paths
from google.colab import drive
import os

## copy files from drive to colab
import shutil
# Specify the path to your "going_modular" folder in Google Drive
drive_path = "/content/drive/MyDrive/IFCB/going_modular"


# List files in the "going_modular" folder
files = os.listdir(drive_path)

# Copy each file individually to the /content directory
for file in files:
    src = os.path.join(drive_path, file)
    dst = os.path.join("/content", file)
    try:
        shutil.copy(src, dst)
    except Exception as e:
        print(f"Failed to copy {file}: {e}")

# wait a minute or so before it pops up

## 01. Prepare data

In [12]:
from pathlib import Path

# Set data paths
data_path = Path("/content/drive/MyDrive/IFCB/data/")
image_path = Path("/content/drive/MyDrive/IFCB/data/IFCB_test_train")

# Setup Dirs
train_dir = image_path / "Train"
test_dir = image_path / "Test"


In [13]:
# Continue with regular imports
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms

# Try to get torchinfo, install it if it doesn't work
try:
    from torchinfo import summary
except:
    print("[INFO] Couldn't find torchinfo... installing it.")
    !pip install -q torchinfo
    from torchinfo import summary

# Now try importing modules directly from the copied drive files
try:
    import data_setup
    import engine

except ModuleNotFoundError as e:
    print(f"Failed to import modules: {e}")


In [14]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [16]:
#Create transforms pipeline
simple_transform = transforms.Compose([
    transforms.Resize((244, 244)),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

In [19]:
# Create training and testing dataloaders as well as get a list of class names
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                               test_dir=test_dir,
                                                                               transform=simple_transform, # resize, convert images to between 0 & 1 and normalize them
                                                                               batch_size=32) # set mini-batch size to 32

train_dataloader, test_dataloader, class_names

(<torch.utils.data.dataloader.DataLoader at 0x7f594051e3b0>,
 <torch.utils.data.dataloader.DataLoader at 0x7f594051d510>,
 ['Chaetoceros_decipiens_118',
  'Guinardia_delicatula_095',
  'Tripos_muelleri_008'])

### Get and prepare a pretrained model


In [20]:
# model = torchvision.models.efficientnet_b0(pretrained=True)

# New method of creating a pretrained model (torchvision v0.13+)
#weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT # ".DEFAULT" = best available weights
model = torchvision.models.efficientnet_b0(pretrained=True).to(device)
#model #uncoment to output (its very long)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 66.5MB/s]


In [21]:
# Freeze all of the base layers in EffNetB0
for param in model.features.parameters():
  # print(param)
  param.requires_grad = False

In [22]:
# Update the classifier head of our model to suit our problem
from torch import nn

torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Get the length of class_names (one output unit for each class)
output_shape = len(class_names)

#Recreate the classifier layer and seed it to the target device

model.classifier = nn.Sequential(
    nn.Dropout(p=0.2, inplace=True),
    nn.Linear(in_features=1280, # feature vector coming in
              out_features=output_shape,
              bias=True)).to(device) # how many classes do we have?

#model.classifier

### Train model

In [23]:
# Define loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [24]:
# Set the manual seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Start the timer
from timeit import default_timer as timer
start_time = timer()

# Setup training and save the results
results = engine.train(model=model,
                       train_dataloader=train_dataloader,
                       test_dataloader=test_dataloader,
                       optimizer=optimizer,
                       loss_fn=loss_fn,
                       epochs=5,
                       device=device)

# End the timer and print out how long it took
end_time = timer()
print(f"[INFO] Total training time: {end_time-start_time:.3f} seconds")

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.8551 | train_acc: 0.6667 | test_loss: 0.5687 | test_acc: 1.0000
Epoch: 2 | train_loss: 0.4823 | train_acc: 0.9251 | test_loss: 0.4524 | test_acc: 1.0000
Epoch: 3 | train_loss: 0.3169 | train_acc: 0.9554 | test_loss: 0.3679 | test_acc: 1.0000
Epoch: 4 | train_loss: 0.2022 | train_acc: 0.9821 | test_loss: 0.2679 | test_acc: 1.0000
Epoch: 5 | train_loss: 0.1382 | train_acc: 0.9955 | test_loss: 0.2006 | test_acc: 1.0000
[INFO] Total training time: 221.102 seconds


### Make predictions on the entire test dataset with the model

In [None]:
from tqdm.auto import tqdm
# Make predictions on the entire test dataset
test_preds = []
model.eval()
print(f"Length of test dataloader is {len(test_dataloader)} batches")

with torch.inference_mode():
  #loop through the batches in the test dataloader
  for X, y in tqdm(test_dataloader):
    X, y = X.to(device), y.to(device)
    # PAss the data through the model
    test_logits = model(X)

    #convert the pred logits to pred probs
    pred_probs = torch.softmax(test_logits, dim=1)

    #convert the pred probs into pred labels
    pred_labels = torch.argmax(pred_probs, dim=1)


