# Skin Disease Diagnosis - LLaMA Factory (Cloud)


In [None]:
# Check system and install dependencies
import torch
import subprocess
import os

# Check hardware
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    gpu_count = torch.cuda.device_count()
    print(f"GPU count: {gpu_count}")
    for i in range(gpu_count):
        gpu_name = torch.cuda.get_device_name(i)
        gpu_memory = torch.cuda.get_device_properties(i).total_memory / 1e9
        print(f"GPU {i}: {gpu_name} ({gpu_memory:.1f} GB)")

# Install LLaMA Factory
%pip install llamafactory[torch,metrics]


In [None]:
# Setup environment for 2x RTX 4090
import os
import json
import pandas as pd
from PIL import Image

# Configure for dual RTX 4090 setup
gpu_count = torch.cuda.device_count()
if gpu_count >= 2:
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
    print(f"Configured for {gpu_count} GPUs")
else:
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    print(f"Single GPU setup: {gpu_count}")

# Disable wandb if not needed
os.environ['WANDB_DISABLED'] = 'true'


In [None]:
# Download and prepare ISIC dataset
# For cloud machines, we'll download the dataset directly

import wget
import zipfile

# Download ISIC 2018 dataset (or your preferred dataset)
dataset_url = "https://challenge.isic-archive.com/data/"  # Update with actual URL
data_dir = "/workspace/skin_data"  # Common path for cloud instances
os.makedirs(data_dir, exist_ok=True)

print("Dataset directory created. Please upload your ISIC dataset to:", data_dir)
print("Expected structure:")
print("  /workspace/skin_data/images/")
print("  /workspace/skin_data/metadata.csv")

# Alternative: Use a smaller test dataset
# You can modify this to download from your preferred source


In [None]:
# Prepare dataset for LLaMA Factory
def prepare_skin_dataset(image_dir, metadata_file, output_dir):
    """Convert skin dataset to LLaMA Factory format"""
    
    # Load metadata
    if metadata_file.endswith('.json'):
        with open(metadata_file, 'r') as f:
            metadata = json.load(f)
    else:
        df = pd.read_csv(metadata_file)
        metadata = df.to_dict('records')
    
    llamafactory_data = []
    
    for item in metadata:
        # Handle different column names
        image_cols = ['image_name', 'isic_id', 'image_id', 'filename']
        image_filename = None
        for col in image_cols:
            if col in item and item[col]:
                image_filename = str(item[col])
                break
        
        if not image_filename:
            continue
            
        # Add extension if missing
        if not image_filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            image_filename += '.jpg'
        
        image_path = os.path.join(image_dir, image_filename)
        
        if os.path.exists(image_path):
            # Build response
            diagnosis = item.get('diagnosis', item.get('dx', 'skin lesion'))
            response = f"This image shows {diagnosis}."
            
            # Add location info if available
            if 'localization' in item:
                response += f" Located on the {item['localization']}."
            elif 'location' in item:
                response += f" Located on the {item['location']}."
            
            # Add metadata
            if 'age' in item:
                response += f" Patient age: {item['age']}."
            if 'sex' in item:
                response += f" Patient gender: {item['sex']}."
            
            entry = {
                "conversations": [
                    {
                        "from": "human", 
                        "value": "<image>\\nAnalyze this skin lesion and provide a diagnosis."
                    },
                    {
                        "from": "gpt",
                        "value": response
                    }
                ],
                "images": [image_filename]
            }
            
            llamafactory_data.append(entry)
    
    # Save dataset
    os.makedirs(output_dir, exist_ok=True)
    dataset_file = os.path.join(output_dir, 'skin_dataset.json')
    with open(dataset_file, 'w') as f:
        json.dump(llamafactory_data, f, indent=2)
    
    return dataset_file, len(llamafactory_data)

# Configuration
config = {
    "image_dir": "/workspace/skin_data/images",
    "metadata_file": "/workspace/skin_data/metadata.csv", 
    "output_dir": "/workspace/llamafactory_data"
}


In [None]:
# Convert dataset (run this after uploading your data)
if os.path.exists(config["metadata_file"]):
    dataset_file, num_samples = prepare_skin_dataset(
        image_dir=config["image_dir"],
        metadata_file=config["metadata_file"], 
        output_dir=config["output_dir"]
    )
    print(f"Dataset prepared: {num_samples} samples")
    print(f"Saved to: {dataset_file}")
else:
    print("Metadata file not found. Please upload your dataset first.")
    print(f"Looking for: {config['metadata_file']}")


In [None]:
# Create dataset configuration for LLaMA Factory
dataset_info = {
    "skin_diagnosis": {
        "file_name": "skin_dataset.json",
        "formatting": "sharegpt",
        "columns": {
            "messages": "conversations",
            "images": "images"
        },
        "tags": {
            "role_tag": "from",
            "content_tag": "value",
            "user_tag": "human", 
            "assistant_tag": "gpt"
        }
    }
}

# Save dataset config
dataset_config_file = os.path.join(config["output_dir"], 'dataset_info.json')
with open(dataset_config_file, 'w') as f:
    json.dump(dataset_info, f, indent=2)

print(f"Dataset config saved: {dataset_config_file}")


In [None]:
# Create optimized training script for 2x RTX 4090
def create_cloud_training_script(data_dir):
    """Create training script optimized for dual RTX 4090"""
    
    training_script = f"""
import os
import subprocess
import torch

# Environment setup
gpu_count = torch.cuda.device_count()
print(f"Detected {{gpu_count}} GPUs")

# RTX 4090 optimized settings
if gpu_count >= 2:
    # Dual RTX 4090 setup (48GB total VRAM)
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
    per_device_batch_size = "4"  # Higher batch size for RTX 4090
    gradient_accumulation = "2"   # Lower accumulation needed
    max_samples = "2000"         # More samples
    epochs = "5"                 # More epochs
    dataloader_workers = "8"     # More workers for faster loading
else:
    # Single GPU fallback
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    per_device_batch_size = "2"
    gradient_accumulation = "4"
    max_samples = "1000"
    epochs = "3"
    dataloader_workers = "4"

os.environ['WANDB_DISABLED'] = 'true'
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

# Training command
cmd = [
    "llamafactory-cli", "train",
    "--stage", "sft",
    "--do_train",
    "--model_name_or_path", "Qwen/Qwen2-VL-2B-Instruct",
    "--dataset", "skin_diagnosis",
    "--dataset_dir", "{data_dir}",
    "--template", "qwen2_vl", 
    "--finetuning_type", "lora",
    "--lora_target", "all",
    "--lora_r", "64",              # Higher rank for RTX 4090
    "--lora_alpha", "128",
    "--output_dir", "/workspace/skin_model_output",
    "--overwrite_output_dir",
    "--per_device_train_batch_size", per_device_batch_size,
    "--gradient_accumulation_steps", gradient_accumulation,
    "--lr_scheduler_type", "cosine",
    "--logging_steps", "10",
    "--warmup_ratio", "0.1",
    "--save_steps", "200",
    "--learning_rate", "2e-4",     # Higher LR for faster training
    "--num_train_epochs", epochs,
    "--max_samples", max_samples,
    "--val_size", "0.1",
    "--evaluation_strategy", "steps",
    "--eval_steps", "200",
    "--plot_loss",
    "--fp16",
    "--visual_inputs",
    "--freeze_vision_tower",
    "--dataloader_num_workers", dataloader_workers,
    "--save_total_limit", "5",
    "--load_best_model_at_end",
    "--metric_for_best_model", "eval_loss"
]

# Multi-GPU optimizations
if gpu_count >= 2:
    cmd.extend([
        "--ddp_find_unused_parameters", "false",
        "--dataloader_pin_memory", "true",
        "--group_by_length", "false"
    ])

print("Starting training...")
print(f"Configuration:")
print(f"  GPUs: {{gpu_count}}")
print(f"  Batch size per device: {{per_device_batch_size}}")
print(f"  Gradient accumulation: {{gradient_accumulation}}")
print(f"  Max samples: {{max_samples}}")
print(f"  Epochs: {{epochs}}")

result = subprocess.run(cmd, check=True)
print("Training completed successfully!")
"""
    
    script_path = "/workspace/train_skin_model.py"
    with open(script_path, 'w') as f:
        f.write(training_script)
    
    return script_path

# Create training script
train_script = create_cloud_training_script(config["output_dir"])
print(f"Training script created: {train_script}")


In [None]:
# Start training
print("Starting training process...")
print("This will take some time depending on your dataset size")
print("For 2x RTX 4090: expect ~1-2 hours for 2000 samples")

exec(open('/workspace/train_skin_model.py').read())


In [None]:
# Test the trained model
def create_inference_script():
    """Create script to test the trained model"""
    
    inference_script = """
#!/bin/bash

echo "Testing the trained skin diagnosis model..."

llamafactory-cli chat \\
    --model_name_or_path Qwen/Qwen2-VL-2B-Instruct \\
    --adapter_name_or_path /workspace/skin_model_output \\
    --template qwen2_vl \\
    --finetuning_type lora
"""
    
    with open("/workspace/test_model.sh", 'w') as f:
        f.write(inference_script)
    
    os.chmod("/workspace/test_model.sh", 0o755)
    print("Inference script created: /workspace/test_model.sh")

# Create web interface script
def create_webui_script():
    """Create script for LLaMA Factory web interface"""
    
    webui_script = """
#!/bin/bash

echo "Starting LLaMA Factory Web Interface..."

llamafactory-cli webui \\
    --host 0.0.0.0 \\
    --port 7860
"""
    
    with open("/workspace/launch_webui.sh", 'w') as f:
        f.write(webui_script)
    
    os.chmod("/workspace/launch_webui.sh", 0o755)
    print("Web UI script created: /workspace/launch_webui.sh")

# Create scripts
create_inference_script()
create_webui_script()

print("\\nModel testing options:")
print("1. CLI chat: bash /workspace/test_model.sh")
print("2. Web interface: bash /workspace/launch_webui.sh")
print("3. Output model: /workspace/skin_model_output")


## Cloud Setup Instructions

### Vast.ai Setup:
1. **Choose instance**: Search for "RTX 4090" with 2+ GPUs
2. **Select template**: PyTorch or Deep Learning template
3. **Connect**: Use Jupyter or SSH
4. **Upload data**: Upload your ISIC dataset to `/workspace/skin_data/`

### Expected Performance (2x RTX 4090):
- **VRAM**: 48GB total (24GB per GPU)
- **Training speed**: ~2x faster than single GPU
- **Batch size**: 4 per GPU = 8 total
- **Training time**: ~1-2 hours for 2000 samples

### Commands:
- **Training**: `python /workspace/train_skin_model.py`
- **Testing**: `bash /workspace/test_model.sh`
- **Web UI**: `bash /workspace/launch_webui.sh`
