<a href="https://colab.research.google.com/github/Sowdarjya/bladebreakers_iotricity/blob/main/irrigation_predictor_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import classification_report, roc_auc_score, mean_absolute_error
import shap
import joblib

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv('soil_data.csv')
df['Time'] = pd.to_datetime(df['Time'])
df = df.set_index('Time')
df.sort_index()

Unnamed: 0_level_0,Humidity,Atmospheric_Temp,Soil_Temp,Soil_Moisture,Dew_Point
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-06-11 21:00:21+00:00,25.252367,15.487149,12.457884,63.077305,23.918315
2017-06-11 22:00:21+00:00,33.304246,12.115891,8.514263,40.097357,31.542544
2017-06-11 23:00:21+00:00,24.072051,17.319203,11.913326,100.141916,21.142841
2017-06-12 00:00:21+00:00,45.026218,15.865805,14.700958,64.638742,42.217799
2017-06-12 01:00:21+00:00,41.490917,13.842739,8.495549,97.964697,38.257137
...,...,...,...,...,...
2018-06-11 17:00:21+00:00,37.667161,5.501209,5.897786,70.082583,35.622101
2018-06-11 18:00:21+00:00,31.148023,7.845985,7.859976,83.095407,29.614203
2018-06-11 19:00:21+00:00,25.695779,13.482887,10.423108,38.487019,25.477196
2018-06-11 20:00:21+00:00,48.461891,12.112883,11.219195,29.527964,43.521510


In [3]:
threshold = df['Soil_Moisture'].quantile(0.2)
df['irrigation_needed'] = (df['Soil_Moisture'] < threshold).astype(int)

In [4]:
df['Soil_Moisture_Deep'] = df['Soil_Moisture']
df['Soil_Moisture_Shallow'] = df['Soil_Moisture'] + np.random.normal(0, 2, len(df))
np.random.seed(42)
df['Rainfall'] = np.random.choice([0, 1], size=len(df), p=[0.8, 0.2])
df['irrigation_amount'] = np.maximum(0, (30 - df['Soil_Moisture']) + np.random.normal(0, 2, len(df)))

In [5]:
feature_cols = [
    "Soil_Moisture_Shallow",
    "Soil_Moisture_Deep",
    "Atmospheric_Temp",
    "Humidity",
    "Rainfall"
]

In [6]:
df['Hour'] = df.index.hour
df['Month'] = df.index.month
feature_cols.extend(['Hour', 'Month'])

In [7]:
def determine_optimal_irrigation_time(temp, humidity, hour, month, rainfall=0):
    """
    Determine optimal irrigation time based on environmental conditions

    Args:
        temp: Atmospheric temperature (°C)
        humidity: Humidity percentage
        hour: Current hour (0-23)
        month: Month (1-12)
        rainfall: Rainfall indicator (0=no rain, 1=rain)

    Returns:
        tuple: (recommended_time_description, optimal_hour, reasoning)
    """

    if rainfall == 1:
        return "Defer irrigation (Rain expected)", None, "Natural precipitation available"

    et_risk = (temp - 20) * 0.1 + (100 - humidity) * 0.05
    et_risk = max(0, et_risk)  

    current_season = get_season(month)

    if et_risk < 2:  # Low risk conditions
        if 5 <= hour <= 8:
            return "Optimal time - Early Morning", hour, f"Low evaporation risk (ET score: {et_risk:.1f}), excellent absorption"
        elif 18 <= hour <= 21:
            return "Good time - Evening", hour, f"Low evaporation risk (ET score: {et_risk:.1f}), good absorption"
        elif hour < 5:
            return "Early Morning (5-8 AM) recommended", 6, f"Current time too early, wait for sunrise"
        elif 9 <= hour <= 17:
            return "Evening (6-9 PM) recommended", 18, f"Avoid midday heat, wait for evening"
        else:
            return "Early Morning (5-8 AM) recommended", 6, f"Late evening, schedule for next morning"

    elif et_risk < 4:  # Moderate risk conditions
        if 5 <= hour <= 7:
            return "Optimal time - Early Morning", hour, f"Moderate evaporation risk (ET score: {et_risk:.1f}), prioritize early irrigation"
        elif 19 <= hour <= 21:
            return "Acceptable time - Late Evening", hour, f"Moderate evaporation risk (ET score: {et_risk:.1f}), evening irrigation acceptable"
        elif hour < 5:
            return "Early Morning (5-7 AM) strongly recommended", 6, f"Wait for optimal morning window"
        else:
            return "Early Morning (5-7 AM) strongly recommended", 6, f"High evaporation risk during day, wait for morning"

    else:  # High risk conditions (hot and dry)
        if 5 <= hour <= 6:
            return "Critical - Early Morning Only", hour, f"High evaporation risk (ET score: {et_risk:.1f}), irrigate immediately"
        elif hour < 5:
            return "Critical - Early Morning (5-6 AM) ONLY", 5
        else:
            return "Critical - Wait for Early Morning (5-6 AM)", 5, f"Extreme evaporation risk (ET score: {et_risk:.1f}), avoid all daytime irrigation"


In [8]:
def get_season(month):
    """Determine season based on month (Northern Hemisphere)"""
    if month in [12, 1, 2]:
        return "Winter"
    elif month in [3, 4, 5]:
        return "Spring"
    elif month in [6, 7, 8]:
        return "Summer"
    else:
        return "Fall"

In [9]:
def get_irrigation_efficiency(hour, temp, humidity):
    """
    Calculate irrigation efficiency score (0-1, higher is better)
    Based on evapotranspiration rates throughout the day
    """
    hourly_efficiency = {
        0: 0.85, 1: 0.85, 2: 0.85, 3: 0.85, 4: 0.85,
        5: 0.95, 6: 0.95, 7: 0.90, 8: 0.80, 9: 0.70,
        10: 0.60, 11: 0.50, 12: 0.40, 13: 0.35, 14: 0.35,
        15: 0.40, 16: 0.45, 17: 0.55, 18: 0.70, 19: 0.80,
        20: 0.85, 21: 0.85, 22: 0.85, 23: 0.85
    }
    
    base_eff = hourly_efficiency.get(hour, 0.5)
    
    temp_adjustment = max(0, 1 - (temp - 25) * 0.02)
    
    humidity_adjustment = 0.8 + (humidity / 100) * 0.2
    
    return base_eff * temp_adjustment * humidity_adjustment

In [10]:
X = df[feature_cols]
y_class = df["irrigation_needed"]
y_reg = df["irrigation_amount"]

In [11]:
clf = RandomForestClassifier(n_estimators=100, random_state=42, class_weight="balanced")
reg = RandomForestRegressor(n_estimators=100, random_state=42)

In [12]:
clf.fit(X, y_class)
reg.fit(X, y_reg)

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [13]:
test_cases = [
    {
        "Soil_Moisture_Shallow": 10.0,
        "Soil_Moisture_Deep": 15.0,
        "Atmospheric_Temp": 35.0,
        "Humidity": 14.0,
        "Rainfall": 0,
        "Hour": 5,
        "Month": 5,
        "Description": "Early morning, hot and dry conditions"
    },
    {
        "Soil_Moisture_Shallow": 10.0,
        "Soil_Moisture_Deep": 15.0,
        "Atmospheric_Temp": 35.0,
        "Humidity": 14.0,
        "Rainfall": 0,
        "Hour": 14,
        "Month": 5,
        "Description": "Midday, hot and dry conditions"
    },
    {
        "Soil_Moisture_Shallow": 45.0,
        "Soil_Moisture_Deep": 50.0,
        "Atmospheric_Temp": 32.0,
        "Humidity": 60.0,
        "Rainfall": 1,
        "Hour": 5,
        "Month": 5,
        "Description": "Early morning with rain expected"
    },
    {
        "Soil_Moisture_Shallow": 10.0,
        "Soil_Moisture_Deep": 15.0,
        "Atmospheric_Temp": 28.0,
        "Humidity": 70.0,
        "Rainfall": 0,
        "Hour": 19,
        "Month": 3,
        "Description": "Evening, moderate conditions"
    },
    {
        "Soil_Moisture_Shallow": 8.0,
        "Soil_Moisture_Deep": 12.0,
        "Atmospheric_Temp": 42.0,
        "Humidity": 10.0,
        "Rainfall": 0,
        "Hour": 12,
        "Month": 7,
        "Description": "Noon, extreme heat and very dry"
    }
]


In [14]:
training_feature_names = X.columns.tolist()


In [15]:
print("=== ENHANCED IRRIGATION RECOMMENDATIONS ===\n")

for i, case in enumerate(test_cases, 1):
    model_input = {k: v for k, v in case.items() if k in training_feature_names}
    test_input = pd.DataFrame([model_input], columns=training_feature_names)
    
    prediction = clf.predict(test_input)[0]
    probability = clf.predict_proba(test_input)[:, 1][0]
    irrigation_amt = reg.predict(test_input)[0]
    
    timing_rec, optimal_hour, reasoning = determine_optimal_irrigation_time(
        case["Atmospheric_Temp"], 
        case["Humidity"], 
        case["Hour"], 
        case["Month"],
        case["Rainfall"]
    )
    
    current_efficiency = get_irrigation_efficiency(
        case["Hour"], 
        case["Atmospheric_Temp"], 
        case["Humidity"]
    )
    
    if prediction == 1 and case["Rainfall"] == 0:
        if current_efficiency > 0.7:
            decision = "✅ IRRIGATE NOW"
            action_note = f"Good timing (Efficiency: {current_efficiency:.1%})"
        else:
            decision = "⏰ SCHEDULE IRRIGATION"
            action_note = f"Poor timing now (Efficiency: {current_efficiency:.1%})"
    elif case["Rainfall"] == 1:
        decision = "❌ NO IRRIGATION"
        action_note = "Natural precipitation expected"
    else:
        decision = "❌ NO IRRIGATION"
        action_note = "Soil moisture sufficient"
    
    print(f"🌱 TEST CASE {i}: {case['Description']}")
    print(f"📊 Conditions: {case['Atmospheric_Temp']}°C, {case['Humidity']}% humidity")
    print(f"🎯 Model Decision: {decision}")
    print(f"📈 Irrigation Probability: {probability:.1%}")
    
    if prediction == 1:
        print(f"💧 Recommended Amount: {irrigation_amt:.1f} L/m²")
        print(f"⏰ Timing: {timing_rec}")
        print(f"🧠 Reasoning: {reasoning}")
        print(f"⚡ Current Efficiency: {current_efficiency:.1%}")
        
        if optimal_hour and optimal_hour != case["Hour"]:
            optimal_efficiency = get_irrigation_efficiency(optimal_hour, case["Atmospheric_Temp"], case["Humidity"])
            print(f"📅 Optimal Hour: {optimal_hour}:00 (Efficiency: {optimal_efficiency:.1%})")
    
    print(f"💡 Action: {action_note}")
    print("-" * 60)

=== ENHANCED IRRIGATION RECOMMENDATIONS ===

🌱 TEST CASE 1: Early morning, hot and dry conditions
📊 Conditions: 35.0°C, 14.0% humidity
🎯 Model Decision: ⏰ SCHEDULE IRRIGATION
📈 Irrigation Probability: 95.0%
💧 Recommended Amount: 13.3 L/m²
⏰ Timing: Critical - Early Morning Only
🧠 Reasoning: High evaporation risk (ET score: 5.8), irrigate immediately
⚡ Current Efficiency: 62.9%
💡 Action: Poor timing now (Efficiency: 62.9%)
------------------------------------------------------------
🌱 TEST CASE 2: Midday, hot and dry conditions
📊 Conditions: 35.0°C, 14.0% humidity
🎯 Model Decision: ⏰ SCHEDULE IRRIGATION
📈 Irrigation Probability: 95.0%
💧 Recommended Amount: 13.9 L/m²
⏰ Timing: Critical - Wait for Early Morning (5-6 AM)
🧠 Reasoning: Extreme evaporation risk (ET score: 5.8), avoid all daytime irrigation
⚡ Current Efficiency: 23.2%
📅 Optimal Hour: 5:00 (Efficiency: 62.9%)
💡 Action: Poor timing now (Efficiency: 23.2%)
------------------------------------------------------------
🌱 TEST CASE 3

In [16]:
print("\n=== IRRIGATION TIMING ANALYSIS ===")

hourly_irrigation = df.groupby('Hour').agg({
    'irrigation_needed': 'mean',
    'Atmospheric_Temp': 'mean',
    'Humidity': 'mean'
}).round(3)

print("\nHourly Irrigation Patterns:")
print("Hour | Irrigation Need | Avg Temp | Avg Humidity | Efficiency Score")
print("-" * 65)

for hour in range(24):
    if hour in hourly_irrigation.index:
        irrigation_need = hourly_irrigation.loc[hour, 'irrigation_needed']
        avg_temp = hourly_irrigation.loc[hour, 'Atmospheric_Temp']
        avg_humidity = hourly_irrigation.loc[hour, 'Humidity']
        efficiency = get_irrigation_efficiency(hour, avg_temp, avg_humidity)
        
        print(f"{hour:2d}   | {irrigation_need:11.3f} | {avg_temp:8.1f} | {avg_humidity:12.1f} | {efficiency:11.1%}")



=== IRRIGATION TIMING ANALYSIS ===

Hourly Irrigation Patterns:
Hour | Irrigation Need | Avg Temp | Avg Humidity | Efficiency Score
-----------------------------------------------------------------
 0   |       0.195 |     13.5 |         35.7 |       91.1%
 1   |       0.178 |     13.5 |         35.3 |       91.1%
 2   |       0.219 |     13.6 |         35.1 |       90.8%
 3   |       0.195 |     13.7 |         35.3 |       90.7%
 4   |       0.216 |     13.4 |         35.2 |       91.1%
 5   |       0.178 |     13.1 |         35.4 |      102.3%
 6   |       0.195 |     13.9 |         35.2 |      101.1%
 7   |       0.203 |     13.5 |         35.3 |       96.4%
 8   |       0.244 |     13.7 |         35.3 |       85.4%
 9   |       0.230 |     13.4 |         35.5 |       75.1%
10   |       0.192 |     13.8 |         35.2 |       64.0%
11   |       0.184 |     13.6 |         36.2 |       53.6%
12   |       0.200 |     13.7 |         34.5 |       42.6%
13   |       0.189 |     13.4 |   

In [17]:
print(f"\n=== OPTIMAL IRRIGATION WINDOWS ===")
print("Based on efficiency scores and typical conditions:")

optimal_hours = []
for hour in range(24):
    avg_temp = hourly_irrigation.loc[hour, 'Atmospheric_Temp'] if hour in hourly_irrigation.index else 30
    avg_humidity = hourly_irrigation.loc[hour, 'Humidity'] if hour in hourly_irrigation.index else 50
    efficiency = get_irrigation_efficiency(hour, avg_temp, avg_humidity)
    optimal_hours.append((hour, efficiency))

optimal_hours.sort(key=lambda x: x[1], reverse=True)

print("\nTop 5 Most Efficient Hours:")
for i, (hour, eff) in enumerate(optimal_hours[:5], 1):
    time_desc = f"{hour:02d}:00"
    if 5 <= hour <= 8:
        period = "Early Morning"
    elif 18 <= hour <= 21:
        period = "Evening"
    elif 22 <= hour or hour <= 4:
        period = "Night"
    else:
        period = "Midday"
    
    print(f"{i}. {time_desc} ({period}) - Efficiency: {eff:.1%}")



=== OPTIMAL IRRIGATION WINDOWS ===
Based on efficiency scores and typical conditions:

Top 5 Most Efficient Hours:
1. 05:00 (Early Morning) - Efficiency: 102.3%
2. 06:00 (Early Morning) - Efficiency: 101.1%
3. 07:00 (Early Morning) - Efficiency: 96.4%
4. 00:00 (Night) - Efficiency: 91.1%
5. 21:00 (Evening) - Efficiency: 91.1%


In [18]:
def save_enhanced_irrigation_system():
    """Save models and timing functions for deployment"""
    
    joblib.dump(clf, "irrigation_model.pkl")
    joblib.dump(reg, "irrigation_amount_model.pkl")
    
    irrigation_system = {
        'model_classification': clf,
        'model_regression': reg,
        'feature_columns': training_feature_names,
        'timing_function': determine_optimal_irrigation_time,
        'efficiency_function': get_irrigation_efficiency
    }
    
    joblib.dump(irrigation_system, "complete_irrigation_system.pkl")
    print("\n✅ Enhanced irrigation system saved to 'complete_irrigation_system.pkl'")


In [19]:
def get_irrigation_recommendation(soil_shallow, soil_deep, temp, humidity, rainfall, hour, month):
    """
    Complete irrigation recommendation system
    
    Returns: Dictionary with full recommendation details
    """
    
    # Prepare input for models
    model_input = pd.DataFrame([{
        "Soil_Moisture_Shallow": soil_shallow,
        "Soil_Moisture_Deep": soil_deep,
        "Atmospheric_Temp": temp,
        "Humidity": humidity,
        "Rainfall": rainfall,
        "Hour": hour,
        "Month": month
    }], columns=training_feature_names)
    
    # Get predictions
    irrigation_needed = clf.predict(model_input)[0]
    confidence = clf.predict_proba(model_input)[:, 1][0]
    amount = reg.predict(model_input)[0] if irrigation_needed else 0
    
    # Get timing recommendation
    timing_rec, optimal_hour, reasoning = determine_optimal_irrigation_time(
        temp, humidity, hour, month, rainfall
    )
    
    # Calculate current efficiency
    current_eff = get_irrigation_efficiency(hour, temp, humidity)
    
    # Make final decision
    if irrigation_needed and rainfall == 0:
        if current_eff > 0.7:
            action = "IRRIGATE_NOW"
        else:
            action = "SCHEDULE_IRRIGATION"
    elif rainfall == 1:
        action = "NO_IRRIGATION_RAIN"
    else:
        action = "NO_IRRIGATION_SUFFICIENT"
    
    return {
        'action': action,
        'irrigation_needed': bool(irrigation_needed),
        'confidence': round(confidence, 3),
        'amount_liters_per_m2': round(amount, 2) if amount > 0 else 0,
        'timing_recommendation': timing_rec,
        'optimal_hour': optimal_hour,
        'reasoning': reasoning,
        'current_efficiency': round(current_eff, 3),
        'evapotranspiration_risk': round((temp - 20) * 0.1 + (100 - humidity) * 0.05, 2)
    }

In [20]:
print("\n=== COMPREHENSIVE SYSTEM TEST ===")
for i, case in enumerate(test_cases, 1):
    recommendation = get_irrigation_recommendation(
        case["Soil_Moisture_Shallow"],
        case["Soil_Moisture_Deep"], 
        case["Atmospheric_Temp"],
        case["Humidity"],
        case["Rainfall"],
        case["Hour"],
        case["Month"]
    )
    
    print(f"\nTest Case {i}: {case['Description']}")
    print(f"Action: {recommendation['action']}")
    print(f"Timing: {recommendation['timing_recommendation']}")
    print(f"Reasoning: {recommendation['reasoning']}")
    if recommendation['amount_liters_per_m2'] > 0:
        print(f"Amount: {recommendation['amount_liters_per_m2']} L/m²")
    print(f"Efficiency: {recommendation['current_efficiency']:.1%}")

save_enhanced_irrigation_system()


=== COMPREHENSIVE SYSTEM TEST ===

Test Case 1: Early morning, hot and dry conditions
Action: SCHEDULE_IRRIGATION
Timing: Critical - Early Morning Only
Reasoning: High evaporation risk (ET score: 5.8), irrigate immediately
Amount: 13.34 L/m²
Efficiency: 62.9%

Test Case 2: Midday, hot and dry conditions
Action: SCHEDULE_IRRIGATION
Timing: Critical - Wait for Early Morning (5-6 AM)
Reasoning: Extreme evaporation risk (ET score: 5.8), avoid all daytime irrigation
Amount: 13.94 L/m²
Efficiency: 23.2%

Test Case 3: Early morning with rain expected
Action: NO_IRRIGATION_RAIN
Timing: Defer irrigation (Rain expected)
Reasoning: Natural precipitation available
Efficiency: 75.2%

Test Case 4: Evening, moderate conditions
Action: IRRIGATE_NOW
Timing: Acceptable time - Late Evening
Reasoning: Moderate evaporation risk (ET score: 2.3), evening irrigation acceptable
Amount: 14.27 L/m²
Efficiency: 70.7%

Test Case 5: Noon, extreme heat and very dry
Action: SCHEDULE_IRRIGATION
Timing: Critical - Wai