# My implementation of the EfficientNetVit

data_setup.py - create datasets and dataloaders.

In [1]:
import pandas as pd
import timm
import imageio.v3 as imageio
import math
import numpy as np
import matplotlib.pyplot as plt
import torch
import albumentations as A

from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from albumentations.pytorch import ToTensorV2
from torch import nn

INFO:albumentations.check_version:A new version of Albumentations is available: 1.4.8 (you have 1.4.7). Upgrade using: pip install --upgrade albumentations


#### Set meta-parameters

In [2]:
TARGET_COLUMNS = ['X4_mean', 'X11_mean', 'X18_mean', 'X50_mean', 'X26_mean', 'X3112_mean']
N_VAL_SAMPLES0 = 4096
TESTING = True
MODEL_NAME = 'efficientnet_b5.sw_in12k_ft_in1k'
V_MIN = 0
V_MAX = 0
TARGET_COLUMNS_TEST = ['X4', 'X11', 'X18', 'X50', 'X26', 'X3112']
BATCH_SIZE = 24
BATCH_SIZE_VAL = 128
LOG_FEATURES = ['X11_mean', 'X18_mean', 'X50_mean', 'X26_mean', 'X3112_mean'] # x4 not here, X4 doesnt need log-scaling?
N_TARGETS = len(TARGET_COLUMNS)

# Feature Scaler
Y_SHIFT = np.zeros(N_TARGETS)
Y_STD = np.zeros(N_TARGETS)

IMAGE_SIZE0 = 512
IMAGE_SIZE = 288

# # Dataset
# RECOMPUTE_DATAFRAMES = False
# # Training
# LR_MAX = 3e-4
# WEIGHT_DECAY = 0.01
N_EPOCHS = 12
# TRAIN_MODEL = True
# # Others
# # IS_INTERACTIVE = os.environ['KAGGLE_KERNEL_RUN_TYPE'] == 'Interactive'
# IS_INTERACTIVE = False
# SEED = 42
# EPS = 1e-6
# EPS_CUDA = torch.tensor([EPS]).to('cuda')

## 1. Get data

In [3]:
train0 = pd.read_pickle('train.pkl')

# Split train in train/val
train, val = train_test_split(train0, test_size=N_VAL_SAMPLES0, shuffle=True, random_state=42)
train = train.reset_index(drop=True)
val = val.reset_index(drop=True)

test = pd.read_pickle('test.pkl')

In [4]:
### Use 5% of data for testing
if TESTING:
    testing_train_size = math.floor(train.shape[0]*0.05)
    testing_val_size = math.floor(val.shape[0]*0.05)
    testing_test_size = math.floor(test.shape[0]*0.05)
    train = train.sample(testing_train_size)
    val = val.sample(testing_val_size)
    test = test.sample(testing_test_size)

In [73]:
train.shape, val.shape, test.shape

((2569, 178), (204, 178), (327, 166))

In [74]:
V_MIN = train[TARGET_COLUMNS].quantile(0.0005)
V_MAX = train[TARGET_COLUMNS].quantile(0.985)
pd.DataFrame(zip(V_MIN, V_MAX))

Unnamed: 0,0,1
0,-1.355158,0.897844
1,0.00729,45.738037
2,0.006107,25.881885
3,0.060027,4.230211
4,0.001201,960.389841
5,0.024205,20718.088466


In [75]:
# Feature Columns
FEATURE_COLUMNS = test.columns.values[1:-2]
N_FEATURES = len(FEATURE_COLUMNS)
print(f'N_FEATURES: {N_FEATURES}')

N_FEATURES: 163


### 1.1 Create masks with transformations

#### 1.1.1 Filter outliers

In [76]:
def get_mask(df, TARGET_COLUMNS, V_MIN, V_MAX):
    '''
    This function produces a boolean mask for target columns 
    based on value range between V_MIN and V_MAX values.
    Returns a boolean vector indicating observations with True for all six traits falling within the V_MIN - V_MAX range.
    '''
    mask = np.empty(shape = df[TARGET_COLUMNS].shape, dtype=bool)
    for idx, (t, v_min, v_max) in enumerate(zip(TARGET_COLUMNS, V_MIN, V_MAX)):
        labels = df[t].values
        mask[:, idx] = ((labels > v_min) & (labels < v_max))
    return mask.min(axis=1)

In [77]:
MASK_TRAIN = get_mask(train, TARGET_COLUMNS, V_MIN, V_MAX)
MASK_VAL = get_mask(val, TARGET_COLUMNS, V_MIN, V_MAX)
train_mask = train[MASK_TRAIN].reset_index(drop=True)
val_mask = val[MASK_VAL].reset_index(drop=True)

### 1.2 Add number of steps

In [78]:
# Add number of steps
N_TRAIN_SAMPLES = len(train_mask)
N_VAL_SAMPLES = len(val_mask)
N_STEPS_PER_EPOCH = (N_TRAIN_SAMPLES // BATCH_SIZE)
N_VAL_STEPS_PER_EPOCH = math.ceil(N_VAL_SAMPLES / BATCH_SIZE_VAL)
N_STEPS = N_STEPS_PER_EPOCH * N_EPOCHS + 1
print(f"N_TRAIN_SAMPLES: {N_TRAIN_SAMPLES} | N_VAL_SAMPLES: {N_VAL_SAMPLES} | N_STEPS_PER_EPOCH: {N_STEPS_PER_EPOCH} | N_VAL_STEPS_PER_EPOCH: {N_VAL_STEPS_PER_EPOCH} | N_STEPS: {N_STEPS}")

N_TRAIN_SAMPLES: 2378 | N_VAL_SAMPLES: 195 | N_STEPS_PER_EPOCH: 99 | N_VAL_STEPS_PER_EPOCH: 2 | N_STEPS: 1189


### 1.3 Label normalization

In [79]:
def fill_y(TARGET_COLUMNS, LOG_FEATURES, Y_SHIFT, Y_STD, df, normalize = False):
    '''
    Args:
    y - an empty array
    '''
    y = np.zeros_like(df[TARGET_COLUMNS], dtype=np.float32)
    for target_idx, target in enumerate(TARGET_COLUMNS):
        v = df[target]
        if normalize:
            # Log10 transform
            if target in LOG_FEATURES:
                v = np.log10(v)
            # Shift to have zero median
            Y_SHIFT[target_idx] = np.mean(v)
            v = v - np.median(v)
            # Uniform variance
            Y_STD[target_idx] = np.std(v)
            v = v/np.std(v)
        # Assign to y_train
        y[:, target_idx] = v
    return y 
        

In [80]:
y_train_mask_raw = fill_y(TARGET_COLUMNS, LOG_FEATURES, Y_SHIFT, Y_STD, train_mask, normalize=False)
y_train_mask = fill_y(TARGET_COLUMNS, LOG_FEATURES, Y_SHIFT, Y_STD, train_mask, normalize=True)
y_val_mask = fill_y(TARGET_COLUMNS, LOG_FEATURES, Y_SHIFT, Y_STD, train_mask, normalize=True)

In [81]:
# Values
display(pd.DataFrame({
    'y_shift': Y_SHIFT,
    'y_std': Y_STD
}, index=TARGET_COLUMNS))

Unnamed: 0,y_shift,y_std
X4_mean,0.515449,0.140351
X11_mean,1.149262,0.224978
X18_mean,-0.035369,0.656117
X50_mean,0.1725,0.160889
X26_mean,0.392586,1.023566
X3112_mean,2.817999,0.670438


In [82]:
def plot_target_distribution():
    fig, axes = plt.subplots(N_TARGETS, 3, figsize=(20, N_TARGETS*4))
    v_raw = train[TARGET_COLUMNS].values
    for (ax_raw, ax_mask, ax_norm), target, v_r, v_n in zip(axes, TARGET_COLUMNS, v_raw.T, y_train_mask.T):
        # Raw
        ax_raw.hist(v_r, bins=128)
        ax_raw.set_title(f'{target} Raw min: {v_r.min():.3f}, max: {v_r.max():.2e}, µ: {v_r.mean():.2e}, σ: {v_r.std():.2f}', size=10)
        # Masked
        v_m = v_r[MASK_TRAIN]
        ax_mask.hist(v_r, bins=128)
        ax_mask.set_title(f'{target} Masked min: {v_m.min():.3f}, max: {v_m.max():.2e}, µ: {v_m.mean():.2e}, σ: {v_m.std():.2f}', size=10)
        # Normalized
        ax_norm.hist(v_n, bins=128)
        ax_norm.set_title(f'{target} Norm min: {v_n.min():.3f}, max: {v_n.max():.2f}, µ: {v_n.mean():.2f}, σ: {v_n.std():.2f}', size=10)
    plt.subplots_adjust(hspace=0.25, wspace=0.30)
    plt.show()
    
# plot_target_distribution()

### 1.4 Standardize the fratures

In [83]:
FEATURE_SCALER = StandardScaler()

# Fit and transform on training features
train_features_mask = FEATURE_SCALER.fit_transform(train_mask[FEATURE_COLUMNS].values.astype(np.float32))

# Transform val/test
val_features_mask = FEATURE_SCALER.transform(val_mask[FEATURE_COLUMNS].values.astype(np.float32))
test_features = FEATURE_SCALER.transform(test[FEATURE_COLUMNS].values.astype(np.float32))

# Convert features to torch tensors
train_features_mask = torch.tensor(train_features_mask)
val_features_mask = torch.tensor(val_features_mask)
test_features = torch.tensor(test_features)

### 1.5 Transforms and augmentations

In [117]:
# Attempt to joint augmentations and model-specific transformations
# TRAIN_AUGMENTATIONS = A.Compose([
#     A.RandomSizedCrop(
#         [int(0.85*IMAGE_SIZE), IMAGE_SIZE],
#         IMAGE_SIZE0, IMAGE_SIZE0, w2h_ratio=1.0, p=1
# 	),
#     A.HorizontalFlip(p=0.5),
#     A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p = 0.5),
#     A.ImageCompression(quality_lower=75, quality_upper=100, p = 0.5),
#     # Model specific transformations
#     A.Resize(448, 448),
#     A.Normalize(mean=(0.4850, 0.4560, 0.4060), std=(0.2290, 0.2240, 0.2250)),
# 	ToTensorV2(),
# ])

# VAL_TEST_TRANSFORMS = A.Compose([
#     # Model specific transformations
#     A.Resize(448, 448),
#     A.Normalize(mean=(0.4850, 0.4560, 0.4060), std=(0.2290, 0.2240, 0.2250)),
#     ToTensorV2(),
# ])

# Training Augmentations
TRAIN_TRANSFORMS = A.Compose([
        A.RandomSizedCrop(
            [int(0.85*IMAGE_SIZE0), IMAGE_SIZE0],
            IMAGE_SIZE, IMAGE_SIZE, w2h_ratio=1.0, p=1.0
        ),
        A.HorizontalFlip(p=0.50),
        A.RandomBrightnessContrast(brightness_limit=0.10, contrast_limit=0.10, p=0.50),
        A.ImageCompression(quality_lower=75, quality_upper=100, p=0.5),
        ToTensorV2(),
    ])

# Test Augmentations
VAL_TEST_TRANSFORMS = A.Compose([
        A.Resize(IMAGE_SIZE,IMAGE_SIZE),
        ToTensorV2(),
    ])


## 2. Create Datasets and DataLoaders

### 2.1 Create model specific transformations

These model specific transofrmations need to be applied to all train, validation and test data. These are basic transformations that need to be applied for a given model to work.

In [118]:
model = timm.create_model(MODEL_NAME, pretrained=True)

# get model specific transforms (normalization, resize)
model = model.eval()
data_config = timm.data.resolve_model_data_config(model)
image_transforms = timm.data.create_transform(**data_config, is_training=False)
image_transforms

INFO:timm.models._builder:Loading pretrained weights from Hugging Face hub (timm/efficientnet_b5.sw_in12k_ft_in1k)
INFO:timm.models._hub:[timm/efficientnet_b5.sw_in12k_ft_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.


Compose(
    Resize(size=(448, 448), interpolation=bicubic, max_size=None, antialias=True)
    CenterCrop(size=(448, 448))
    ToTensor()
    Normalize(mean=tensor([0.4850, 0.4560, 0.4060]), std=tensor([0.2290, 0.2240, 0.2250]))
)

Here I mixed the two: `albumentations` and `torchvidsion.Compose`

In [119]:
# print(type(image_transforms), type(TRAIN_AUGMENTATIONS))

The `torchvision` transformations work if I use `Image.fromarray`

### Figuring out the transformations

The data from the dataset is in the form of `bytes`. In the dataset the resulting `"image'` element of the result list is a `torch.Tensor`. 
I want to:
1. Transform `bytes` to `nd.array`
2. Augment `nd.array` and return `torch.Tensor`

In [120]:
IMAGE_SIZE = 448
img_bytes = train_mask['jpeg_bytes'].values[0]

In [121]:
def model_specific_transforms(transforms, image_bytes):
       img = Image.fromarray(imageio.imread(image_bytes))
       trans_img = transforms(img)
       print(f"Image spec trans: {trans_img.shape}")
       return trans_img

def augment(aug_pipeline, image):
       image_array = np.array(image.permute(1,2,0))
       print(f"Permuted shape: {image_array.shape}")
       augmented_img = aug_pipeline(image=image_array)['image']
       print(f"Augmented shape: {augmented_img.shape}")
       return augmented_img

Now model specific transformation will be included in the dataloader.
In the model I will use augmentations.

In [122]:
augment(TRAIN_AUGMENTATIONS, model_specific_transforms(image_transforms, img_bytes)).shape
# imageio.imread(self.X_jpeg_bytes[index])
# mst = model_specific_transforms(image_transforms, image_bytes=img_bytes)
# There is a problem with the rang of the RGB
# plt.imshow(mst.permute(1,2,0))

Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])


torch.Size([3, 448, 448])

In [123]:
print(type(TRAIN_AUGMENTATIONS), type(image_transforms))

<class 'albumentations.core.composition.Compose'> <class 'torchvision.transforms.transforms.Compose'>


In [124]:
import os
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

NUM_WORKERS = os.cpu_count()

# Create the Plant Traits Photo and Tabular Dataset
class PTFTDataset(Dataset):
	def __init__(self, X_jpeg_bytes, y, features, transforms=None, augmentations=None):
		self.X_jpeg_bytes = X_jpeg_bytes
		self.y = y
		self.features = features
		self.transforms = transforms
		self.augmentations = augmentations

	def __len__(self):
		return len(self.X_jpeg_bytes)
	
	def __getitem__(self, index):
		X_sample = {
			'image': augment(self.augmentations, model_specific_transforms(self.transforms,
				self.X_jpeg_bytes[index])),
			'feature': self.features[index],
		}
		y_sample = self.y[index]

		return X_sample, y_sample

In [125]:
# Dataloder function
def create_dataloaders(
	train_data: pd.DataFrame,
	train_y: torch.tensor,
	train_features: torch.tensor,
	val_data: pd.DataFrame,
	val_y: torch.tensor,
	val_features: torch.tensor,
	test_data: pd.DataFrame,
	test_y: torch.tensor,
	test_features: torch.tensor,
	transforms: transforms.Compose,
	batch_size: int,
	num_workers: int=0
):
	
	# Train dataset and dataloader
	train_dataset = PTFTDataset(
		train_data['jpeg_bytes'].values,
		train_y,
		train_features,
		transforms=transforms,
		augmentations=TRAIN_AUGMENTATIONS
	)
    
	train_dataloader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        drop_last=True,
        num_workers=num_workers,
    )
	
	# Validataion dataset and dataloader
	val_dataset = PTFTDataset(
		val_data['jpeg_bytes'].values,
		val_y,
		val_features,
		transforms=transforms
	)
    
	val_dataloader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        drop_last=True,
        num_workers=num_workers,
    )
	
	# Test dataset
	test_dataset = PTFTDataset(
		test_data['jpeg_bytes'].values,
		test_y,
		test_features,
		transforms=transforms
	)

	return train_dataloader, val_dataloader, test_dataset

!!! Use autotransforms in dataloaders and augmentations in the model or during trainig

In [126]:
train_dataloader, val_dataloader, test_dataset = create_dataloaders(
    train_data=train_mask,
    train_y=y_train_mask,
	train_features=train_features_mask,
	val_data=val_mask,
	val_y=y_val_mask,
	val_features=val_features_mask,
	test_data=test,
	test_y=test['id'].values,
	test_features=test_features,
	transforms=image_transforms,
	batch_size=BATCH_SIZE,
	num_workers=0
)

train_dataloader, val_dataloader, test_dataset

(<torch.utils.data.dataloader.DataLoader at 0x221812e5c30>,
 <torch.utils.data.dataloader.DataLoader at 0x221812e5d50>,
 <__main__.PTFTDataset at 0x221812e6470>)

## 3. Model

In [127]:
train_dataloader_iter = iter(train_dataloader)
# X_batch, y_batch = next(train_dataloader_iter)
# for k, v in X_batch.items():
#     print(f'X_batch {k} shape: {v.shape}, dtype: {v.dtype}')
#     print(f'X_batch {k} min: {v.min():.3f}, max: {v.max():.3f}')
#     print(f'X_batch {k} µ: {v.float().mean():.3f}, σ: {v.float().std():.3f}')
# # Label
# print(f'y_batch shape: {y_batch.shape}, dtype: {y_batch.dtype}')
# print(f'y_batch min: {y_batch.min():.3f}, max: {y_batch.max():.3f}')
# print(f'y_batch µ: {y_batch.mean():.3f}, σ: {y_batch.std():.3f}')

In [128]:
next(train_dataloader_iter)

Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])
Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])
Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])
Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])
Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])
Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])
Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])
Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])
Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448

  warn(


Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])
Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])
Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])
Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])
Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])
Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])
Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])
Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448, 448, 3)
Augmented shape: torch.Size([3, 448, 448])
Image spec trans: torch.Size([3, 448, 448])
Permuted shape: (448

[{'image': tensor([[[[-2.1071, -2.1084, -2.1107,  ..., -2.1481, -2.1462, -2.1335],
            [-2.1037, -2.1063, -2.1101,  ..., -2.1485, -2.1466, -2.1350],
            [-2.1032, -2.1063, -2.1103,  ..., -2.1485, -2.1472, -2.1382],
            ...,
            [-2.1176, -2.1246, -2.1322,  ..., -2.1151, -2.1158, -2.1181],
            [-2.1189, -2.1265, -2.1340,  ..., -2.1157, -2.1153, -2.1164],
            [-2.1202, -2.1280, -2.1347,  ..., -2.1189, -2.1174, -2.1164]],
  
           [[-2.0237, -2.0256, -2.0284,  ..., -2.0604, -2.0587, -2.0466],
            [-2.0209, -2.0238, -2.0280,  ..., -2.0609, -2.0593, -2.0482],
            [-2.0210, -2.0244, -2.0289,  ..., -2.0609, -2.0600, -2.0515],
            ...,
            [-2.0276, -2.0351, -2.0436,  ..., -2.0257, -2.0263, -2.0286],
            [-2.0282, -2.0366, -2.0449,  ..., -2.0251, -2.0249, -2.0263],
            [-2.0295, -2.0377, -2.0452,  ..., -2.0273, -2.0263, -2.0257]],
  
           [[-1.7903, -1.7928, -1.7969,  ..., -1.8329, -1.830

In [83]:
example_batch = next(train_dataloader_iter)[0]['image'].shape
example_batch

torch.Size([24, 3, 448, 448])

In [28]:
class Model(nn.Module):
	def __init__(self,model_name=MODEL_NAME, augmentations = TRAIN_AUGMENTATIONS):
		
		super().__inti__()
		
		self.augmentations = augmentations
		self.backbone = timm.create_model(model_name,pretrained=True, num_classes=0)
		self.transforms = None
		self.features = nn.Sequential(
			nn.Linear(N_FEATURES, 256),
			nn.GELU(),
			nn.Linear(256, 256),
		)
		self.label = nn.Sequential(
			nn.Linear(256, 256),
			nn.GELU(),
			nn.Linear(256, N_TARGETS, bias=False),
		)

		self.initialize_weights()

	def initialize_weights(self):
		nn.init.kaiming_uniform_(self.features[2].weight)
		nn.init.zeros_(self.label[2].weight)

	def forward(self, inputs):
		return {'label': self.label(
			self.backbone(self.augmentations(inputs['image'].float())) + self.features(inputs['frature'])
		)}