In [6]:
"""

Author: Annam.ai IIT Ropar
Team Name: SoilMate
Team Members: Kshitiz Jangra, Harshal Chaudhari
Leaderboard Rank: 62

"""

# This is the notebook used for training the model.
# Cell 1: Imports
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from src.preprocessing import load_train_data
from src.postprocessing import evaluate_model, plot_training_history

# Cell 2: Load data
train_dir = '../data/soil-classification/soil_classification-2025/train'
labels_path = '../data/soil-classification/soil_classification-2025/train_labels.csv'

X, y, mapping, train_df = load_train_data(train_dir, labels_path)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Cell 3: Define model
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

model = Sequential([
    base_model,
    Flatten(),
    Dense(1024, activation='relu'),
    Dropout(0.5),
    Dense(len(mapping), activation='softmax')
])

model.compile(optimizer=Adam(0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Cell 4: Train
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=45,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

# Cell 5: Save model
model.save('../working/soil_classification_model.keras')

dummy_metrics = {
    "_comment": "This JSON file containing the ml-metrics",
    "Name": "Annam.ai",
    "Kaggle Username": "annam.ai",
    "Team Name": "soilclassifiers",
    "f1 scores": {
        "_comment": "Here are the class wise f1 scores",
        "alluvial soil": 0.48,
        "red soil": 0.48,
        "black soil": 0.48,
        "clay soil": 0.48
    }
}

import json
with open('../docs/cards/ml-metrics.json', 'w') as f:
    json.dump(dummy_metrics, f, indent=4)


# Cell 6: Evaluate & visualize
evaluate_model(model, X_val, y_val, mapping)
plot_training_history(history)


📁 ml-metrics.json saved successfully.
