In [7]:
import yaml
import os
config_data = {
    "paths": {
        "data_dir": "/kaggle/working/",
        "log_dir": "logs",
        "cache_dir": "/kaggle/working/",
        "checkpoint_dir": "/kaggle/working/"
    },
    "wandb": {
        "using": False,
        "api_key": "your_wandb_api_key",
        "project": "project_name",
        "run_name_template": "{hidden_dim}x{num_hidden_layers}_training"
    },
    # Model: convnext_base_w, Pretrained: laion2b_s13b_b82k

    "training": {
        "batch_size":4 ,
        "num_epochs": 10,
        "accumulation_steps": 2
    },
    "model": {
        "name": "ViT-B-16-quickgelu", # Replace the Model with desired CLIP Model
        "pretrained": "openai", # Corresponding Pretrained Dataset
        "clip_dim": 512, # Don't forget to change this
        "hidden_dim": [160],
        "dropout_rate": [0.15],
        "num_hidden_layers": [1],
    },
    "optimizer": {
        "clip_lr": [1e-5],
        "predictor_lr": [5e-5],
        "weight_decay": [0.01],
        "beta1": [0.9],
        "beta2": [0.999]
    },
    "scheduler": {
        "gamma": 0.1,
        "milestones": [4, 6, 10]
    }
}

output_dir = "/teamspace/studios/this_studio/"  # Replace with the desired directory
file_name = "config.yml"

# Ensure the directory exists
os.makedirs(output_dir, exist_ok=True)

# Full file path
file_path = os.path.join(output_dir, file_name)

# Write the YAML content to the file
with open(file_path, "w") as file:
    yaml.dump(config_data, file, default_flow_style=False)

print(f"YAML file saved to {file_path}")


YAML file saved to /teamspace/studios/this_studio/config.yml


In [6]:
import gradio as gr
import torch
import torch.nn.functional as F
from torch import nn
from PIL import Image
import open_clip
import yaml

##################
# CATEGORY_MAPPING
##################
CATEGORY_MAPPING = {
    "defect": {
        "fresh": "class"
    }
}

##################
# CategoryAwareAttributePredictor
##################
class CategoryAwareAttributePredictor(nn.Module):
    def __init__(self, clip_dim=768, category_attributes=None, attribute_dims=None, hidden_dim=512, dropout_rate=0.2, num_hidden_layers=1):
        super(CategoryAwareAttributePredictor, self).__init__()
        self.category_attributes = category_attributes
        self.attribute_predictors = nn.ModuleDict()
        
        for category, attributes in category_attributes.items():
            for attr_name in attributes.keys():
                key = f"{category}_{attr_name}"
                if key in attribute_dims:
                    layers = []
                    # Input layer
                    layers.append(nn.Linear(clip_dim, hidden_dim))
                    layers.append(nn.LayerNorm(hidden_dim))
                    layers.append(nn.ReLU())
                    layers.append(nn.Dropout(dropout_rate))
                    
                    # Additional hidden layers
                    current_dim = hidden_dim
                    for _ in range(num_hidden_layers - 1):
                        layers.append(nn.Linear(current_dim, current_dim // 2))
                        layers.append(nn.LayerNorm(current_dim // 2))
                        layers.append(nn.ReLU())
                        layers.append(nn.Dropout(dropout_rate))
                        current_dim = current_dim // 2

                    # Output layer
                    layers.append(nn.Linear(current_dim, attribute_dims[key]))
                    
                    self.attribute_predictors[key] = nn.Sequential(*layers)
    
    def forward(self, clip_features, category):
        results = {}
        category_attrs = self.category_attributes[category]
        clip_features = clip_features.float()
        
        for attr_name in category_attrs.keys():
            key = f"{category}_{attr_name}"
            if key in self.attribute_predictors:
                results[key] = self.attribute_predictors[key](clip_features)
        
        return results

##################
# Helper Functions
##################

def load_config(config_path):
    with open(config_path, 'r') as f:
        return yaml.safe_load(f)

def load_models(config, checkpoint_path, device):
    # Create CLIP model and transforms
    clip_model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms(
        config['model']['name'],
        pretrained=config['model']['pretrained'],
        device=device
    )
    clip_model = clip_model.float()
    
    # Define attribute_dims (binary classification: 2 classes)
    attribute_dims = {"defect_fresh": 2}
    
    model = CategoryAwareAttributePredictor(
        clip_dim=config['model']['clip_dim'],
        category_attributes=CATEGORY_MAPPING,
        attribute_dims=attribute_dims,
        hidden_dim=config['model']['hidden_dim'][0],
        dropout_rate=config['model']['dropout_rate'][0],
        num_hidden_layers=config['model']['num_hidden_layers'][0]
    ).to(device)

    # Load checkpoint
    checkpoint = torch.load(checkpoint_path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    clip_model.load_state_dict(checkpoint['clip_model_state_dict'])

    model.eval()
    clip_model.eval()
    
    return clip_model, model, preprocess_val

def infer_image(clip_model, model, preprocess, image, device):
    image_tensor = preprocess(image).unsqueeze(0).to(device)
    category = "defect"  # known from training
    
    with torch.no_grad():
        image_features = clip_model.encode_image(image_tensor)
        predictions = model(image_features, category)
        logits = predictions["defect_fresh"]  # shape [1, 2]
        probs = F.softmax(logits, dim=1)
        pred_class = torch.argmax(probs, dim=1).item()

        class_names = ["rotten", "fresh"]
        pred_label = class_names[pred_class]
        
        return pred_label, probs.cpu().numpy()

##################
# Initialization
##################
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
config_path = "config.yml"  # Replace with actual path
checkpoint_path = "/kaggle/working/binary_checkpoint_epoch1.pth"  # Replace with actual path

config = load_config(config_path)
clip_model, model, preprocess_val = load_models(config, checkpoint_path, device)

def process_image(image):
    image_pil = Image.fromarray(image)
    pred_label, probs = infer_image(clip_model, model, preprocess_val, image_pil, device)
    return f"Predicted Label: {pred_label}\nProbabilities: {probs}"

##################
# Gradio Integration
##################

with gr.Blocks() as demo:
    gr.Markdown("## Fruit Freshness Detection")
    gr.Markdown(
        """
        **Upload an image of a fruit**, and the model will predict whether the fruit is fresh or rotten.
        """
    )

    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="numpy", label="Upload Image")
            submit_button = gr.Button("Run Inference")

        with gr.Column():
            output_box = gr.Textbox(label="Prediction", lines=3, max_lines=5)

    submit_button.click(fn=process_image, inputs=image_input, outputs=[output_box])

demo.launch(share=True)


open_clip_model.safetensors:   0%|          | 0.00/599M [00:00<?, ?B/s]

  checkpoint = torch.load(checkpoint_path, map_location=device)


RuntimeError: Error(s) in loading state_dict for CategoryAwareAttributePredictor:
	Missing key(s) in state_dict: "attribute_predictors.defect_fresh.0.weight", "attribute_predictors.defect_fresh.0.bias", "attribute_predictors.defect_fresh.1.weight", "attribute_predictors.defect_fresh.1.bias", "attribute_predictors.defect_fresh.4.weight", "attribute_predictors.defect_fresh.4.bias". 
	Unexpected key(s) in state_dict: "attribute_predictors.defect_scratch.0.weight", "attribute_predictors.defect_scratch.0.bias", "attribute_predictors.defect_scratch.1.weight", "attribute_predictors.defect_scratch.1.bias", "attribute_predictors.defect_scratch.4.weight", "attribute_predictors.defect_scratch.4.bias". 