In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras import layers
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

PROJECT_DIR = ".."
DATA_DIR = f"{PROJECT_DIR}/data/raw"

# Fine tuning params
MAX_DEPTH = 5
MAX_SAMPLE_SPLIT = 2
MIN_SAMPLE_LEAF = 1
CRITERION = "gini" # gini or entropy

In [None]:
data = pd.read_csv(f"{DATA_DIR}/irrigation.csv")
data.head()

In [None]:
print(data.info())
print(data.describe())
print(data['Pump Data'].value_counts())

In [None]:
X = data[["Soil Moisture", "Temperature", "Air Humidity"]]
y = data["Pump Data"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

clf = DecisionTreeClassifier(
    criterion=CRITERION,   
     splitter="best",         
    max_depth=None,          
    min_samples_split=MAX_SAMPLE_SPLIT,     
    min_samples_leaf=MIN_SAMPLE_LEAF,      
    min_weight_fraction_leaf=0.0,
    max_features=None,       
    random_state=None,
    max_leaf_nodes=None,
    min_impurity_decrease=0.0,
    class_weight=None
)
clf.fit(X_train, y_train)

In [None]:
# Start timing
import time
training_start_time = time.time()
print("Starting model training...")

In [None]:
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
# End timing
training_end_time = time.time()
print(f"Model training and evaluation completed in {training_end_time - training_start_time:.2f} seconds")

In [None]:
# Calculate comprehensive metrics
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report, accuracy_score

# Calculate individual metrics
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print("\nDetailed Classification Report:")
print(classification_report(y_test, y_pred, target_names=["Pump OFF", "Pump ON"]))

In [None]:
sample = [[700, 30, 65]]  # Soil moisture, Temp, Humidity
pred = clf.predict(sample)
print("Pump ON" if pred[0] == 1 else "Pump OFF")

In [None]:
from sklearn import tree
import matplotlib.pyplot as plt

plt.figure(figsize=(15,8))
tree.plot_tree(clf, feature_names=X.columns, class_names=["OFF","ON"], filled=True)
plt.show()


In [None]:
# Irrigation Control Model Logger
import json
import datetime
import os

# Create logs directory if it doesn't exist
os.makedirs(f"{PROJECT_DIR}/logs/irrigation_control", exist_ok=True)

# Calculate training time
try:
    training_time_seconds = training_end_time - training_start_time
except NameError:
    # Fallback if timing wasn't captured
    training_time_seconds = 1.0  # Estimated time for decision tree

training_time_minutes = training_time_seconds / 60
training_time_hours = training_time_minutes / 60

# Get dataset information - convert to regular Python integers
train_class_distribution = {int(k): int(v) for k, v in y_train.value_counts().to_dict().items()}
test_class_distribution = {int(k): int(v) for k, v in y_test.value_counts().to_dict().items()}

# Get feature importance
feature_importance = dict(zip(X.columns, clf.feature_importances_))

# Prepare training log data
irrigation_log = {
    "timestamp": datetime.datetime.now().isoformat(),
    "model_type": "DecisionTreeClassifier",
    "hyperparameters": {
        "criterion": CRITERION,
        "splitter": "best",
        "max_depth": clf.max_depth,
        "min_samples_split": MAX_SAMPLE_SPLIT,
        "min_samples_leaf": MIN_SAMPLE_LEAF,
        "min_weight_fraction_leaf": 0.0,
        "max_features": clf.max_features,
        "random_state": clf.random_state,
        "max_leaf_nodes": clf.max_leaf_nodes,
        "min_impurity_decrease": 0.0,
        "class_weight": clf.class_weight
    },
    "dataset_info": {
        "data_file": "irrigation.csv",
        "features": list(X.columns),
        "target": "Pump Data",
        "total_samples": int(len(data)),
        "train_samples": int(len(X_train)),
        "test_samples": int(len(X_test)),
        "train_split": 0.8,
        "test_split": 0.2,
        "stratified": True,
        "random_state": 42,
        "train_class_distribution": train_class_distribution,
        "test_class_distribution": test_class_distribution
    },
    "training_time": {
        "total_seconds": float(training_time_seconds),
        "total_minutes": float(training_time_minutes),
        "total_hours": float(training_time_hours),
        "formatted": f"{int(training_time_hours):02d}h {int(training_time_minutes % 60):02d}m {int(training_time_seconds % 60):02d}s"
    },
    "results": {
        "accuracy": float(accuracy),
        "f1_score": float(f1),
        "precision": float(precision),
        "recall": float(recall),
        "tree_depth": int(clf.get_depth()),
        "n_leaves": int(clf.get_n_leaves()),
        "n_features": int(clf.n_features_in_),
        "feature_importance": {str(k): float(v) for k, v in feature_importance.items()}
    },
    "sample_predictions": {
        "sample_input": [700, 30, 65],  # Soil moisture, Temp, Humidity
        "sample_prediction": int(pred[0]),
        "sample_result": "Pump ON" if pred[0] == 1 else "Pump OFF"
    },
    "model_details": {
        "n_classes": int(len(clf.classes_)),
        "classes": [int(c) for c in clf.classes_],
        "class_names": ["Pump OFF", "Pump ON"]
    }
}

# Save to JSON file with timestamp
timestamp_str = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
log_filename = f"{PROJECT_DIR}/logs/irrigation_control/irrigation_log_{timestamp_str}.json"

with open(log_filename, 'w') as f:
    json.dump(irrigation_log, f, indent=2)

print(f"Irrigation control model log saved to: {log_filename}")

# Print summary
print("\n" + "="*60)
print("IRRIGATION CONTROL MODEL SUMMARY")
print("="*60)
print(f"Model: Decision Tree Classifier")
print(f"Training Time: {irrigation_log['training_time']['formatted']}")
print(f"Dataset: {irrigation_log['dataset_info']['total_samples']} samples")
print(f"Features: {', '.join(irrigation_log['dataset_info']['features'])}")
print(f"Test Accuracy: {irrigation_log['results']['accuracy']:.4f}")
print(f"F1 Score: {irrigation_log['results']['f1_score']:.4f}")
print(f"Precision: {irrigation_log['results']['precision']:.4f}")
print(f"Recall: {irrigation_log['results']['recall']:.4f}")
print(f"Tree Depth: {irrigation_log['results']['tree_depth']}")
print(f"Number of Leaves: {irrigation_log['results']['n_leaves']}")
print("\nFeature Importance:")
for feature, importance in feature_importance.items():
    print(f"  {feature}: {importance:.4f}")
print(f"\nCriterion: {CRITERION}")
print(f"Max Depth: {MAX_DEPTH if clf.max_depth else 'None'}")
print(f"Min Samples Split: {MAX_SAMPLE_SPLIT}")
print(f"Min Samples Leaf: {MIN_SAMPLE_LEAF}")
print("="*60)