# Cattle Breed Classification Training

## EfficientNet-B0 Training on Google Colab (Free T4 GPU)

This notebook trains the breed classification model using EfficientNet-B0.

**Target:** 85%+ accuracy on Indian cattle and buffalo breeds

## 1. Setup Environment

In [None]:
# Check GPU availability
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("GPU available:", len(tf.config.list_physical_devices('GPU')) > 0)

# If GPU not available, go to Runtime > Change runtime type > GPU
if len(tf.config.list_physical_devices('GPU')) > 0:
    print("GPU Device:", tf.test.gpu_device_name())

In [None]:
# Install dependencies
!pip install -q tensorflow==2.12.0
!pip install -q keras==2.12.0
!pip install -q opencv-python
!pip install -q matplotlib seaborn
!pip install -q scikit-learn
!pip install -q tqdm

In [None]:
# Mount Google Drive for data storage
from google.colab import drive
drive.mount('/content/drive')

# Set paths
import os
BASE_DIR = '/content/drive/MyDrive/cattle-breed-recognition'
DATA_DIR = os.path.join(BASE_DIR, 'data')
MODEL_DIR = os.path.join(BASE_DIR, 'models')

# Create directories
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)

print(f"Base directory: {BASE_DIR}")
print(f"Data directory: {DATA_DIR}")
print(f"Model directory: {MODEL_DIR}")

## 2. Prepare Dataset

In [None]:
# Indian cattle and buffalo breeds
CATTLE_BREEDS = [
    "Gir", "Sahiwal", "Red Sindhi", "Tharparkar", "Rathi",
    "Hallikar", "Amritmahal", "Khillari", "Kangayam", "Bargur",
    "Hariana", "Kankrej", "Ongole", "Deoni", "Krishna Valley",
    "Punganur", "Vechur", "Malnad Gidda",
    "Jersey Cross", "HF Cross"
]

BUFFALO_BREEDS = [
    "Murrah", "Jaffrabadi", "Nili-Ravi", "Banni",
    "Pandharpuri", "Mehsana", "Surti", "Nagpuri",
    "Toda", "Bhadawari"
]

ALL_BREEDS = CATTLE_BREEDS + BUFFALO_BREEDS
NUM_CLASSES = len(ALL_BREEDS)

print(f"Total breeds: {NUM_CLASSES}")
print(f"Cattle breeds: {len(CATTLE_BREEDS)}")
print(f"Buffalo breeds: {len(BUFFALO_BREEDS)}")

In [None]:
# Create breed to index mapping
BREED_TO_IDX = {breed: idx for idx, breed in enumerate(ALL_BREEDS)}
IDX_TO_BREED = {idx: breed for breed, idx in BREED_TO_IDX.items()}

# Save mappings
import json

with open(os.path.join(MODEL_DIR, 'breed_mapping.json'), 'w') as f:
    json.dump({'breed_to_idx': BREED_TO_IDX, 'idx_to_breed': {str(k): v for k, v in IDX_TO_BREED.items()}}, f)

print("Breed mapping saved!")

In [None]:
# Data augmentation and preprocessing
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.efficientnet import preprocess_input

# Image size for EfficientNet-B0
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

# Training data generator with augmentation
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    zoom_range=0.1,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

# Validation and test data generator (no augmentation)
val_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

print("Data generators created!")

In [None]:
# Load datasets from directory structure
# Expected structure:
# data/
#   train/
#     Gir/
#     Sahiwal/
#     ...
#   val/
#     ...
#   test/
#     ...

TRAIN_DIR = os.path.join(DATA_DIR, 'train')
VAL_DIR = os.path.join(DATA_DIR, 'val')
TEST_DIR = os.path.join(DATA_DIR, 'test')

# Check if data exists
if os.path.exists(TRAIN_DIR):
    print("Loading data from directories...")
    
    train_generator = train_datagen.flow_from_directory(
        TRAIN_DIR,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=True
    )
    
    val_generator = val_datagen.flow_from_directory(
        VAL_DIR,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=False
    )
    
    test_generator = val_datagen.flow_from_directory(
        TEST_DIR,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=False
    )
    
    print(f"\nTraining samples: {train_generator.samples}")
    print(f"Validation samples: {val_generator.samples}")
    print(f"Test samples: {test_generator.samples}")
    
else:
    print("Data directory not found. Please upload your dataset to Google Drive.")
    print(f"Expected path: {TRAIN_DIR}")

## 3. Build Model

In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import Model, layers

def build_model(num_classes, input_shape=(224, 224, 3)):
    """
    Build EfficientNet-B0 based classifier.
    """
    # Load pretrained backbone
    backbone = EfficientNetB0(
        include_top=False,
        weights='imagenet',
        input_shape=input_shape
    )
    
    # Freeze backbone initially
    for layer in backbone.layers:
        layer.trainable = False
    
    # Build classification head
    x = backbone.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    model = Model(backbone.input, outputs, name='efficientnet_breed_classifier')
    
    return model, backbone

# Build model
model, backbone = build_model(NUM_CLASSES)
model.summary()

In [None]:
# Compile model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy', tf.keras.metrics.TopKCategoricalAccuracy(k=3)]
)

print("Model compiled!")

## 4. Train Model

In [None]:
# Callbacks
from tensorflow.keras.callbacks import (
    EarlyStopping, 
    ReduceLROnPlateau, 
    ModelCheckpoint,
    TensorBoard
)

checkpoint_path = os.path.join(MODEL_DIR, 'checkpoints')
log_path = os.path.join(MODEL_DIR, 'logs')
os.makedirs(checkpoint_path, exist_ok=True)
os.makedirs(log_path, exist_ok=True)

callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=7,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        min_lr=1e-7,
        verbose=1
    ),
    ModelCheckpoint(
        filepath=os.path.join(checkpoint_path, 'best_model.h5'),
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=1
    ),
    TensorBoard(log_dir=log_path)
]

print("Callbacks ready!")

In [None]:
# Phase 1: Train classification head
EPOCHS_PHASE1 = 10

print("="*60)
print("Phase 1: Training classification head (frozen backbone)")
print("="*60)

history_phase1 = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS_PHASE1,
    callbacks=callbacks
)

In [None]:
# Phase 2: Fine-tune last layers
EPOCHS_PHASE2 = 10
FINE_TUNE_LAYERS = 20

print("="*60)
print(f"Phase 2: Fine-tuning last {FINE_TUNE_LAYERS} layers")
print("="*60)

# Unfreeze last N layers
for layer in backbone.layers[-FINE_TUNE_LAYERS:]:
    layer.trainable = True

# Recompile with lower learning rate
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy', tf.keras.metrics.TopKCategoricalAccuracy(k=3)]
)

history_phase2 = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS_PHASE1 + EPOCHS_PHASE2,
    initial_epoch=EPOCHS_PHASE1,
    callbacks=callbacks
)

## 5. Evaluate Model

In [None]:
# Plot training history
import matplotlib.pyplot as plt

def plot_history(history, title=''):
    fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    
    # Loss
    axes[0].plot(history.history['loss'], label='Train')
    axes[0].plot(history.history['val_loss'], label='Val')
    axes[0].set_title(f'{title} - Loss')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Loss')
    axes[0].legend()
    axes[0].grid(True)
    
    # Accuracy
    axes[1].plot(history.history['accuracy'], label='Train')
    axes[1].plot(history.history['val_accuracy'], label='Val')
    axes[1].set_title(f'{title} - Accuracy')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Accuracy')
    axes[1].legend()
    axes[1].grid(True)
    
    plt.tight_layout()
    plt.show()

# Plot combined history
plot_history(history_phase2, 'EfficientNet-B0 Training')

In [None]:
# Evaluate on test set
print("Evaluating on test set...")
test_results = model.evaluate(test_generator)

print(f"\nTest Results:")
print(f"  Loss: {test_results[0]:.4f}")
print(f"  Accuracy: {test_results[1]:.4f}")
print(f"  Top-3 Accuracy: {test_results[2]:.4f}")

In [None]:
# Generate predictions and confusion matrix
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

# Get predictions
test_generator.reset()
predictions = model.predict(test_generator)
y_pred = np.argmax(predictions, axis=1)
y_true = test_generator.classes

# Get class names
class_names = list(test_generator.class_indices.keys())

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(15, 12))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.tight_layout()
plt.savefig(os.path.join(MODEL_DIR, 'confusion_matrix.png'))
plt.show()

In [None]:
# Classification report
report = classification_report(y_true, y_pred, target_names=class_names)
print(report)

# Save report
with open(os.path.join(MODEL_DIR, 'classification_report.txt'), 'w') as f:
    f.write(report)

## 6. Export Model to TFLite

In [None]:
# Save full model
model.save(os.path.join(MODEL_DIR, 'efficientnet_breed_classifier.h5'))
print("Model saved!")

In [None]:
# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Apply quantization for smaller size
converter.optimizations = [tf.lite.Optimize.DEFAULT]

tflite_model = converter.convert()

# Save TFLite model
tflite_path = os.path.join(MODEL_DIR, 'breed_classifier.tflite')
with open(tflite_path, 'wb') as f:
    f.write(tflite_model)

print(f"TFLite model saved to: {tflite_path}")
print(f"Model size: {len(tflite_model) / 1024 / 1024:.2f} MB")

In [None]:
# Test TFLite model
interpreter = tf.lite.Interpreter(model_path=tflite_path)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print("TFLite Model Details:")
print(f"Input shape: {input_details[0]['shape']}")
print(f"Output shape: {output_details[0]['shape']}")

## 7. Test Predictions

In [None]:
def predict_breed(image_path, model, class_names):
    """
    Predict breed from image.
    """
    from tensorflow.keras.preprocessing import image
    
    # Load and preprocess image
    img = image.load_img(image_path, target_size=IMG_SIZE)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    
    # Predict
    predictions = model.predict(img_array)[0]
    
    # Get top 3 predictions
    top_3_idx = np.argsort(predictions)[::-1][:3]
    
    results = []
    for idx in top_3_idx:
        results.append({
            'breed': class_names[idx],
            'confidence': float(predictions[idx])
        })
    
    return results

# Test with sample image (replace with your image path)
# sample_image = '/content/drive/MyDrive/sample_cow.jpg'
# results = predict_breed(sample_image, model, class_names)
# print("Top 3 Predictions:")
# for r in results:
#     print(f"  {r['breed']}: {r['confidence']:.2%}")

## 8. Summary

In [None]:
print("="*60)
print("TRAINING SUMMARY")
print("="*60)
print(f"\nModel: EfficientNet-B0")
print(f"Number of classes: {NUM_CLASSES}")
print(f"Training samples: {train_generator.samples}")
print(f"Validation samples: {val_generator.samples}")
print(f"Test samples: {test_generator.samples}")
print(f"\nTest Accuracy: {test_results[1]:.2%}")
print(f"Test Top-3 Accuracy: {test_results[2]:.2%}")
print(f"\nModel saved to: {MODEL_DIR}")
print(f"TFLite model size: {len(tflite_model) / 1024 / 1024:.2f} MB")
print("="*60)