In [2]:
import h5py
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import matplotlib.pyplot as plt
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torch import nn
import torchvision

# set device
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


## Data Preparation

In [None]:
# read datafile (h5)
f = h5py.File("Galaxy10_DECals.h5")
images = f.get("images")
labels = f.get("ans")
img_in = np.array(images)
lab_in = np.array(labels)

# write data to tensors
imgs = torch.from_numpy(img_in)
labs = torch.from_numpy(labs)
imgs = [x.permute(2, 0, 1) for x in imgs]


# data augmentation based on model input
MOD_INPUT_SIZE = 224
data_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomResizedCrop(MOD_INPUT_SIZE),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
imgs = torch.stack([data_transform(imgs[i]) for i in range(len(imgs))])

# preparing data as (img, label) tuples
dataset = [(i, l) for i, l in zip(imgs, labs)]

# separating training and test/validation data
train_data, test_data = train_test_split(dataset, test_size=0.1, random_state=10)

# wrapping dataloader for training
train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

## Model Training

In [1]:
learning_rate = 1e-3
batch_size = 64

device = "cuda" if torch.cuda.is_available() else "cpu"

model_conv = torchvision.models.resnet50(pretrained=True)

num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 10)

# num_ftrs = model_conv.classifier[1].in_features
# model_conv.classifier = nn.Linear(num_ftrs, 10)

model_conv = model_conv.to(device)

# Initialize the loss function
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)

optimizer = torch.optim.SGD(model_conv.parameters(), lr=learning_rate)

def train_loop(dataloader, model, loss_fn, optimizer, device):
    size = len(dataloader.dataset)
    loss_l = []
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        # Compute prediction and loss
        pred = model(X.to(device))
        loss = loss_fn(pred, y.to(device))
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Record metrics
        loss_l.append(loss)
        
        print(batch)
        if batch % 64 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    
    return loss_l
            
def test_loop(dataloader, model, loss_fn, device):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X.to(device))
            test_loss += loss_fn(pred, y.to(device)).item()
            correct += (pred.argmax(1) == y.to(device)).type(torch.float).sum().item()
    
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return test_loss, correct

NameError: name 'torch' is not defined

In [13]:
epochs = 3
epoch_l = list(range(epochs))
data_seen = range(len(train_dataloader.dataset)*epochs)
test_l = []
correct_l = []
lost_tot = []
for t in epoch_l:
    print(f"Epoch {t+1}\n-------------------------------")
    lost_l = train_loop(train_dataloader, model_conv, loss_fn, optimizer, device)
    lost_tot.extend(lost_l)
    test_loss, correct = test_loop(test_dataloader, model_conv, loss_fn, device)
    test_l.append(test_loss)
    correct_l.append(correct)
print("Done!")

Epoch 1
-------------------------------
loss: 2.281027  [   64/15962]
loss: 2.242123  [ 4160/15962]
loss: 2.249239  [ 8256/15962]
loss: 2.232876  [12352/15962]
Test Error: 
 Accuracy: 21.3%, Avg loss: 2.200167 

Epoch 2
-------------------------------
loss: 2.193328  [   64/15962]
loss: 2.183141  [ 4160/15962]
loss: 2.157067  [ 8256/15962]
loss: 2.134340  [12352/15962]
Test Error: 
 Accuracy: 29.5%, Avg loss: 2.096284 

Epoch 3
-------------------------------
loss: 2.067110  [   64/15962]
loss: 2.035029  [ 4160/15962]
loss: 2.021489  [ 8256/15962]
loss: 2.023919  [12352/15962]
Test Error: 
 Accuracy: 35.7%, Avg loss: 1.916379 

Epoch 4
-------------------------------
loss: 1.983937  [   64/15962]
loss: 1.705292  [ 4160/15962]
loss: 1.728369  [ 8256/15962]
loss: 1.647164  [12352/15962]
Test Error: 
 Accuracy: 41.4%, Avg loss: 1.701234 

Epoch 5
-------------------------------
loss: 1.787027  [   64/15962]
loss: 1.594134  [ 4160/15962]
loss: 1.671380  [ 8256/15962]
loss: 1.590800  [12352

In [14]:
torch.save(model_conv, 'effnet_tl.pth')
torch.save(model_conv.state_dict(), 'effnetweights.pth')