<a href="https://colab.research.google.com/github/Sowdarjya/bladebreakers_iotricity/blob/main/irrigation_predictor_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import classification_report, roc_auc_score
import shap

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv('soil_data.csv')
df['Time'] = pd.to_datetime(df['Time'])
df = df.set_index('Time')
df.sort_index()

Unnamed: 0_level_0,Humidity,Atmospheric_Temp,Soil_Temp,Soil_Moisture,Dew_Point
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-06-11 21:00:21+00:00,25.252367,15.487149,12.457884,63.077305,23.918315
2017-06-11 22:00:21+00:00,33.304246,12.115891,8.514263,40.097357,31.542544
2017-06-11 23:00:21+00:00,24.072051,17.319203,11.913326,100.141916,21.142841
2017-06-12 00:00:21+00:00,45.026218,15.865805,14.700958,64.638742,42.217799
2017-06-12 01:00:21+00:00,41.490917,13.842739,8.495549,97.964697,38.257137
...,...,...,...,...,...
2018-06-11 17:00:21+00:00,37.667161,5.501209,5.897786,70.082583,35.622101
2018-06-11 18:00:21+00:00,31.148023,7.845985,7.859976,83.095407,29.614203
2018-06-11 19:00:21+00:00,25.695779,13.482887,10.423108,38.487019,25.477196
2018-06-11 20:00:21+00:00,48.461891,12.112883,11.219195,29.527964,43.521510


In [3]:
df.columns

Index(['Humidity', 'Atmospheric_Temp', 'Soil_Temp', 'Soil_Moisture',
       'Dew_Point'],
      dtype='object')

In [4]:
threshold = df['Soil_Moisture'].quantile(0.2)
df['irrigation_needed'] = (df['Soil_Moisture'] < threshold).astype(int)

In [5]:
print(f"Threshold for irrigation = {threshold:.2f}")
print(df["irrigation_needed"].value_counts())

Threshold for irrigation = 46.50
irrigation_needed
0    7009
1    1752
Name: count, dtype: int64


In [6]:
df['Soil_Moisture_Deep'] = df['Soil_Moisture']
df['Soil_Moisture_Shallow'] = df['Soil_Moisture'] + np.random.normal(0, 2, len(df))

In [7]:
feature_cols = [
    "Soil_Moisture_Shallow",
    "Soil_Moisture_Deep",
    "Atmospheric_Temp",
    "Humidity"
]

In [8]:
df['Hour'] = df.index.hour
df['Month'] = df.index.month
feature_cols.extend(['Hour', 'Month'])

In [9]:
x = df[feature_cols]
y = df["irrigation_needed"]

In [10]:
tscv = TimeSeriesSplit(n_splits=5)
aucs = []

In [11]:
for split, (train_idx, test_idx) in enumerate(tscv.split(x), 1):
    X_train, X_test = x.iloc[train_idx], x.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    model = RandomForestClassifier(
        n_estimators=100, random_state=42, class_weight="balanced"
    )
    model.fit(X_train, y_train)

    y_proba = model.predict_proba(X_test)[:, 1]
    roc_auc = roc_auc_score(y_test, y_proba)
    aucs.append(roc_auc)

    print(f"\nSplit {split} ROC-AUC = {roc_auc:.3f}")
    print(classification_report(y_test, model.predict(X_test)))


Split 1 ROC-AUC = 1.000
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1195
           1       1.00      1.00      1.00       265

    accuracy                           1.00      1460
   macro avg       1.00      1.00      1.00      1460
weighted avg       1.00      1.00      1.00      1460


Split 2 ROC-AUC = 1.000
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1171
           1       1.00      1.00      1.00       289

    accuracy                           1.00      1460
   macro avg       1.00      1.00      1.00      1460
weighted avg       1.00      1.00      1.00      1460


Split 3 ROC-AUC = 1.000
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1168
           1       1.00      1.00      1.00       292

    accuracy                           1.00      1460
   macro avg       1.00      1.00      1.00      1460

In [12]:
np.mean(aucs)

np.float64(1.0)

In [13]:
explainer = shap.TreeExplainer(model)
X_for_shap = X_train
shap_values = explainer.shap_values(X_for_shap)

shap_values_class_1 = shap_values[1]

print(f"Shape of shap_values_class_1: {shap_values_class_1.shape}")
print(f"Shape of X_for_shap: {X_for_shap.shape}")

Shape of shap_values_class_1: (6, 2)
Shape of X_for_shap: (7301, 6)


In [14]:
# Ensure the shapes match before plotting
if shap_values_class_1.shape == X_for_shap.shape:
    shap.summary_plot(shap_values_class_1, X_for_shap, feature_names=feature_cols)
else:
    print("Shape mismatch between SHAP values and data used for calculation.")
    print(f"Shape of shap_values_class_1: {shap_values_class_1.shape}")
    print(f"Shape of X_for_shap: {X_for_shap.shape}")

Shape mismatch between SHAP values and data used for calculation.
Shape of shap_values_class_1: (6, 2)
Shape of X_for_shap: (7301, 6)


In [18]:
# Get the feature names and their order from the training data
training_feature_names = x.columns.tolist()

# Generate multiple hardware test cases as a list of dictionaries
test_cases = [
    {
        "Soil_Moisture_Shallow": 10.0,
        "Soil_Moisture_Deep": 15.0,
        "Atmospheric_Temp": 35.0,
        "Humidity": 14.0,
        "Hour": 5,
        "Month": 5
    },   # very dry soil → Irrigate
    {
        "Soil_Moisture_Shallow": 45.0,
        "Soil_Moisture_Deep": 50.0,
        "Atmospheric_Temp": 32.0,
        "Humidity": 14.0,
        "Hour": 5,
        "Month": 5
    },   # wet soil → No irrigation
    {
        "Soil_Moisture_Shallow": 25.0,
        "Soil_Moisture_Deep": 28.0,
        "Atmospheric_Temp": 75.0,
        "Humidity": 10.0,
        "Hour": 7,
        "Month": 7
    },   # moderate soil + high humidity → No irrigation
    {
        "Soil_Moisture_Shallow": 15.0,
        "Soil_Moisture_Deep": 18.0,
        "Atmospheric_Temp": 85.0,
        "Humidity": 16.0,
        "Hour": 12,
        "Month": 12
    },  # dry soil but very humid → maybe No irrigation
]


for i, case_dict in enumerate(test_cases, 1):
    test_input = pd.DataFrame([case_dict], columns=training_feature_names)
    prediction = model.predict(test_input)
    probability = model.predict_proba(test_input)[:,1]

    print(f"\nHardware test case {i}:")
    print("Input:", case_dict)
    print("Prediction:", "Irrigate ✅" if prediction[0]==1 else "No irrigation ❌")
    print("Confidence:", round(probability[0], 3))


Hardware test case 1:
Input: {'Soil_Moisture_Shallow': 10.0, 'Soil_Moisture_Deep': 15.0, 'Atmospheric_Temp': 35.0, 'Humidity': 14.0, 'Hour': 5, 'Month': 5}
Prediction: Irrigate ✅
Confidence: 1.0

Hardware test case 2:
Input: {'Soil_Moisture_Shallow': 45.0, 'Soil_Moisture_Deep': 50.0, 'Atmospheric_Temp': 32.0, 'Humidity': 14.0, 'Hour': 5, 'Month': 5}
Prediction: No irrigation ❌
Confidence: 0.03

Hardware test case 3:
Input: {'Soil_Moisture_Shallow': 25.0, 'Soil_Moisture_Deep': 28.0, 'Atmospheric_Temp': 75.0, 'Humidity': 10.0, 'Hour': 7, 'Month': 7}
Prediction: Irrigate ✅
Confidence: 1.0

Hardware test case 4:
Input: {'Soil_Moisture_Shallow': 15.0, 'Soil_Moisture_Deep': 18.0, 'Atmospheric_Temp': 85.0, 'Humidity': 16.0, 'Hour': 12, 'Month': 12}
Prediction: Irrigate ✅
Confidence: 1.0


In [19]:
import joblib

In [20]:
joblib.dump(model, "irrigation_model.pkl")

['irrigation_model.pkl']