# Data Preprocessing

> Functions and definitions for preprocessing steps, including normalization stats, tokenization, and template formatting.

In [None]:
#| default_exp data.preprocessing

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor
from fastai.vision.augment import Normalize
import torch

from Adaptive_Patching_VIT_fastai.utils import load_config

## Step 1.2 (Continued): Image Normalization Setup

Load the CLIP image processor to get the correct normalization statistics (mean and standard deviation) required for the vision encoder.

In [None]:
#| export
# Load config to get model name
CONFIG_PATH = 'configs/config.yaml'
try:
    config = load_config(CONFIG_PATH)
    VISION_ENCODER_NAME = config['model']['vision_encoder_name_or_path']
except FileNotFoundError:
    print(f"Warning: Config file not found at {CONFIG_PATH}. Using default vision encoder name.")
    VISION_ENCODER_NAME = 'openai/clip-vit-large-patch14-336' # Fallback
except KeyError:
    print(f"Warning: 'model.vision_encoder_name_or_path' not found in {CONFIG_PATH}. Using default.")
    VISION_ENCODER_NAME = 'openai/clip-vit-large-patch14-336' # Fallback

# Load the CLIP image processor
try:
    clip_image_processor = AutoImageProcessor.from_pretrained(VISION_ENCODER_NAME)
    print(f"Successfully loaded CLIP image processor for: {VISION_ENCODER_NAME}")
except Exception as e:
    print(f"Error loading CLIP image processor for {VISION_ENCODER_NAME}: {e}")
    # Handle error appropriately, maybe raise or use default stats
    clip_image_processor = None

# Get normalization stats
if clip_image_processor:
    image_mean = clip_image_processor.image_mean
    image_std = clip_image_processor.image_std
else:
    print("Warning: Using default ImageNet stats as fallback for normalization.")
    # Default fallback (ImageNet stats often used, but CLIP specific is better)
    image_mean = [0.485, 0.456, 0.406]
    image_std = [0.229, 0.224, 0.225]

# Create the fastai Normalize transform using CLIP stats
clip_normalize = Normalize.from_stats(image_mean, image_std)

In [None]:
# Example: Print the stats and the transform
print(f"CLIP Mean: {image_mean}")
print(f"CLIP Std: {image_std}")
print(f"Fastai Normalize Transform: {clip_normalize}")

Successfully loaded CLIP image processor for: openai/clip-vit-large-patch14-336
CLIP Mean: [0.48145466, 0.4578275, 0.40821073]
CLIP Std: [0.26862954, 0.26130258, 0.27577711]
Fastai Normalize Transform: Normalize -- Tries to normalize batch with `mean` and `std` specified on `axes`


---

## Step 1.3: Text Tokenization and Template Handling (Stage 1 - Plain - Placeholder)

This section will be implemented later.

In [None]:
# Placeholder for loading tokenizer
# Placeholder for plain template formatting function
# Placeholder for tokenizer transform

---

## Step 1.5: Implement Custom Batch Transform / Collate Function (Stage 1 - Placeholder)

This section will be implemented later. It will include padding, attention mask creation, image token marker replacement (-200), and label masking.

In [None]:
# Placeholder for LLaVABatchTransform class definition

---

## Step 4.1: Update Data Handling for Stage 2 (Placeholder)

This section will adapt text processing for the Vicuna v1 template and update label masking logic.

In [None]:
# Placeholder for format_v1_template function
# Placeholder for updated LLaVABatchTransform logic

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()