# 🔍 Inference - Soil Detection Challenge

This notebook loads the trained soil classifier and runs inference on the test set provided in `test_ids.csv`.

---


In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import recall_score
import json


# Imports Explanation
We import essential libraries: numpy for array operations, pandas for data handling, sklearn modules for scaling, modeling and evaluation, and json for saving metrics.

In [None]:
scaler = StandardScaler()
train_features = scaler.fit_transform(train_features)
test_features = scaler.transform(test_features)


# Feature Scaling
We standardize features by fitting StandardScaler on training data and applying the same transformation to test data to ensure consistent input distribution.

In [None]:
svm = OneClassSVM(kernel='rbf', gamma='scale', nu=0.1)
svm.fit(train_features)


# Model Training
We initialize a One-Class SVM with RBF kernel to learn the distribution of soil images and fit it on the scaled training features.

In [None]:
svm_preds = svm.predict(test_features)
binary_preds = [1 if p == 1 else 0 for p in svm_preds]

# Inference on Test Set
Using the trained SVM, we predict on test features. Raw outputs (1 for inlier, -1 for outlier) are mapped to binary labels (1 for soil, 0 for non-soil).

In [None]:
submission = pd.DataFrame({
    'image_id': test_ids,
    'label': binary_preds
})
submission.to_csv("submission.csv", index=False)
print("✅ Submission file saved.")

# Submission File Creation
We create a pandas DataFrame mapping each image ID to its predicted label and save the results as `submission.csv` for submission.

In [None]:
train_preds = svm.predict(train_features)
binary_train_preds = [1 if p == 1 else 0 for p in train_preds]
train_labels = [1] * len(binary_train_preds)
recall = recall_score(train_labels, binary_train_preds)
false_negatives = sum([1 for p in binary_train_preds if p == 0])

# Evaluation on Training Data
To gauge model performance on known data, we predict on the training set, calculate recall, and count false negatives (soil classified as non-soil).

In [None]:
metrics = {
    "task": "One-Class Soil Detection",
    "approach": "Anomaly Detection using ResNet18 + One-Class SVM",
    "model": {
        "feature_extractor": "ResNet18 (ImageNet pretrained)",
        "anomaly_model": "OneClassSVM (RBF kernel)",
        "feature_dim": int(train_features.shape[1]),
        "nu": 0.1,
        "scaler": "StandardScaler (mean=0, std=1)",
        "training_samples": len(train_labels)
    },
    "training_data_used": "Only positive class (soil images)",
    "testing_goal": "Identify non-soil images as outliers",
    "evaluation": {
        "recall_on_soil_train": recall,
        "false_negatives_estimate": false_negatives,
        "recall_percent": round(recall * 100, 2)
    },
   
    
    "team_info": {
        "name": "Sanskar Khandelwal",
        "kaggle_username": "sankhuz",
        "team": "TheLastTransformer"
    },
    "files": {
        "train_features": "train_features.npy",
        "test_features": "test_features.npy",
        "test_ids": "test_ids.npy",
        "metrics_file": "metrics.json"
    },
    "notes": "Model trained only on positive samples. No non-soil training data used."
}


# Metrics Construction
We build a structured metrics dictionary capturing task details, model parameters, dataset sizes, and evaluation results for reproducibility.

In [None]:
with open("metrics.json", "w") as f:
    json.dump(metrics, f, indent=4)

print("✅ Improved metrics.json saved.")

with open("metrics.json", "w") as f:
    json.dump(metrics, f, indent=4)

print("✅ metrics.json saved.")


# Saving Metrics
We serialize the metrics dictionary into `metrics.json` to document our experiment configuration and outcomes.

In [None]:


# Predict on test set
# Output: 1 = inlier (soil), -1 = outlier (non-soil)
svm_preds = svm.predict(test_features)
binary_preds = [1 if p == 1 else 0 for p in svm_preds]  # Convert to 1/0
# 11. Save Submission
submission = pd.DataFrame({
    'image_id': test_ids,
    'label': binary_preds
})
submission.to_csv('submission.csv', index=False)
print(" Submission file saved as 'submission.csv'")