In [1]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from efficientnet_pytorch import EfficientNet
import cv2
from torchvision import transforms
import pytorch_lightning as pl
import numpy as np
import random
from PIL import Image
import cloudpickle
from pathlib import Path
import  pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from pytorch_lightning.loggers import CometLogger
import torchvision.models as models
from tqdm import tqdm

torch.manual_seed(42)
np.random.seed(42)
random.seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

ModuleNotFoundError: No module named 'torchvision'

In [2]:
def R2Score(pred, true):
    p = ((true - pred)**2).sum()
    t = ((true - true.mean())**2).sum()
    return 1 - p/tclass EfficientNetDataset(Dataset):
    def __init__(self, dataset, series, transform, phase="train"):
        super(EfficientNetDataset, self).__init__()
        self.dataset = dataset
        self.series = series
        self.phase = phase
        self.transform = transform
    
        
    def __len__(self):
        return len(self.series)
    
    def __getitem__(self, index):
        path = self.dataset[index]
        label = self.series[index].view(-1)
        image = Image.open(path)
        image = self.transform(self.phase, image)
        
        return image, label

In [3]:
class EfficientNetTransform():
    def __init__(self, input_size):
        self.transforms = {
            "train" : transforms.Compose([
                transforms.Resize(input_size),
                #transforms.RandomHorizontalFlip(p=0.5),
                #transforms.RandomVerticalFlip(p=0.5),
                transforms.ToTensor(), 
                #transforms.Normalize(mean=(0, 0, 0), std=1),

            ]), 
            "val" : transforms.Compose([
                transforms.Resize(input_size),
                transforms.ToTensor(),
                #transforms.Normalize(mean=0, std=1)
            ])
        }
        
    def __call__(self, phase, image):
        image = self.transforms[phase](image)
        
        return image

In [4]:
class EfficientNetDataset(Dataset):
    def __init__(self, dataset, series, transform, phase="train"):
        super(EfficientNetDataset, self).__init__()
        self.dataset = dataset
        self.series = series
        self.phase = phase
        self.transform = transform
    
        
    def __len__(self):
        return len(self.series)
    
    def __getitem__(self, index):
        path = self.dataset[index]
        label = self.series[index].view(-1)
        image = Image.open(path)
        image = self.transform(self.phase, image)
        
        return image, label

In [None]:
class EfficientNetModel(nn.Module):
    def __init__(self):
        super(EfficientNetModel, self).__init__()
        self.model = EfficientNet.from_name("efficientnet-b0")

        num_ftrs = self.model._fc.in_features
        self.model._fc = nn.Linear(num_ftrs, 1)
        
    def forward(self, x):
        x = self.model(x)
        
        return x

In [5]:
class BestAndLatestModelCheckpoint(object):
    def __init__(self, save_directory, best_name="best.pkl", latest_name="latest.pkl"):
        self.best_value = 10**9
        self.save_directory= Path(save_directory)
        self.save_directory.mkdir(parents=True, exist_ok=True)
        self.best_path = self.save_directory / best_name
        self.latest_path = self.save_directory / latest_name


    def __call__(self, pred, model):
        if pred < self.best_value:
            self.best_value = pred

            with open(self.best_path, "wb") as f:
                cloudpickle.dump(model, f)
        
        with open(self.latest_path, "wb") as f:
            cloudpickle.dump(model, f)

In [6]:
class EfficientNetSystem(pl.LightningModule):
    def __init__(self, train_dataset, train_series, val_dataset, val_series, input_size, batch_size, checkpoint, lr=0.001, num_workers=6):
        super(EfficientNetSystem, self).__init__()
        use_cuda = torch.cuda.is_available() and True
        self.device = torch.device("cuda" if use_cuda else "cpu")
        self.train_dataset = train_dataset
        self.train_series = train_series
        self.val_dataset = val_dataset
        self.val_series = val_series
        self.input_size = input_size
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.learning_rate = lr
        self.model = EfficientNetModel().to(self.device)
        self.loss = nn.MSELoss()
        self.checkpoint = checkpoint
    
    def forward(self, x):
        x = self.model(x)
        
        return x
    
    def training_step(self, batch, batch_idx):
        image, label = batch
        image = image.to(self.device, dtype=torch.float)
        label = label.to(self.device, dtype=torch.float)
        
        pred = self.forward(image)

        loss = self.loss(pred, label)
        r2 = R2Score(pred, label)
                
        tensorboard_logs = {
            "train_loss" : loss,
            "r2" : r2
        }
        
        return {"loss" : loss, "log" : tensorboard_logs, "progress_bar" : tensorboard_logs}
    
    def validation_step(self, batch, batch_idx):
        image, label = batch
        image = image.to(self.device, dtype=torch.float)
        label = label.to(self.device, dtype=torch.float)
        
        pred = self.forward(image)

        loss = self.loss(pred, label)
        r2 = R2Score(pred, label)
        
        tensorboard_logs = {
            "val_loss" : loss,
            "r2" : r2
        }
        
        return {"val_loss" : loss, "log" : tensorboard_logs}
    
    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        avg_r2 = torch.stack([x["log"]["r2"] for x in outputs]).mean()
        
        self.checkpoint(avg_loss.item(), self.model)
        
        tensorboard_logs = {
            "val_loss" : avg_loss, 
            "val_r2" : avg_r2
        }
        progress_bar = {
            "val_loss" : avg_loss,
            "val_r2" : avg_r2
        }
        
        return {"avg_val_loss" : avg_loss, "log" : tensorboard_logs, "progress_bar" : progress_bar}
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        
        return optimizer
    
    @pl.data_loader
    def train_dataloader(self):
        train_dataset = EfficientNetDataset(
            dataset = self.train_dataset, 
            series = self.train_series,
            transform = EfficientNetTransform(self.input_size)
        )
        
        train_loader = DataLoader(
            train_dataset, 
            shuffle = True, 
            batch_size = self.batch_size,
            num_workers = self.num_workers
        )
        
        return train_loader
    
    @pl.data_loader
    def val_dataloader(self):
        val_dataset = EfficientNetDataset(
            dataset = self.val_dataset, 
            phase="val", 
            series = self.val_series, 
            transform = EfficientNetTransform(self.input_size)
        )
        val_loader = DataLoader(
            val_dataset, 
            batch_size = self.batch_size, 
            num_workers = self.num_workers
        )
        
        return val_loader
    

NameError: name 'pl' is not defined

In [7]:
dataset = [str(x) for x in sorted(Path("image").glob("*.png"))]
series = torch.Tensor(df["Water Solubility"].values)

train_dataset, test_dataset, train_series, test_series = train_test_split(dataset, series, test_size=0.2, random_state=42)

train_dataset, val_dataset, train_series, val_series = train_test_split(train_dataset, train_series, test_size=0.2, random_state=42)

NameError: name 'Path' is not defined

In [8]:
input_size = Image.open(dataset[0]).size
system = EfficientNetSystem(
    train_dataset = train_dataset,
    train_series = train_series, 
    val_dataset = val_dataset,
    val_series = val_series,
    input_size = input_size,
    batch_size = 2,
    checkpoint = BestAndLatestModelCheckpoint("model")
)

comet_logger = CometLogger(
            api_key = "IowbTppLPOohqhcDtzxw76Cot",
            project_name = "Fujifilm",  
            experiment_name = "EfficientNet-b0",
            save_dir = "log"
)


trainer = pl.Trainer(
    num_sanity_val_step = 10,
    max_epochs = 10,
    checkpoint_callback = None,
    logger = comet_logger, 
    gpus = 0,
    distributed_backend='dp'
)
trainer.fit(system)

NameError: name 'Image' is not defined

In [9]:
model_path = "model/best.pkl"
gpuid = [0, 1]
with open(model_path, "rb") as f:
    model = cloudpickle.load(f)
    model = torch.nn.DataParallel(model, device_ids=gpuid)

model.eval()
preds = []
transform = EfficientNetTransform(input_size)

for path in tqdm(train_dataset):
    img = Image.open(path)
    img = transform("val", img)
    img = img[None, ...]
    pred = model(img)
    pred = pred.to("cpu").detach().numpy()
    pred = np.squeeze(pred)
    preds.append(pred)

FileNotFoundError: [Errno 2] No such file or directory: 'model/best.pkl'

In [None]:
r2 = R2Score(torch.Tensor(np.array(preds)), train_series)
print("R2 score :", r2)