In [None]:
# Cell 1: Install packages
!pip install -q opencv-python Pillow scikit-learn

# Cell 2: Imports
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

from src.preprocessing import preprocess_training_data

# Cell 3: Define paths
BASE_PATH = '/kaggle/input/soil-classification-part-2/soil_competition-2025'
TRAIN_DIR = os.path.join(BASE_PATH, 'train')
TRAIN_CSV = os.path.join(BASE_PATH, 'train_labels.csv')

# Cell 4: Load and preprocess training data
X, y = preprocess_training_data(TRAIN_CSV, TRAIN_DIR)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Cell 5: Build the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(1e-4), loss='binary_crossentropy', metrics=['accuracy'])
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Cell 6: Train the model
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32, callbacks=[early_stop])

# Cell 7: Evaluate model
y_pred = (model.predict(X_val) > 0.5).astype("int32")
print("\nClassification Report:\n")
print(classification_report(y_val, y_pred, digits=4))

# Cell 8: Save model
model.save('/kaggle/working/soil_model.h5')
