In [4]:
import xgboost as xgb
import numpy as np
import pandas as pd
import rasterio
import glob
import os
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, r2_score
from datetime import datetime

In [5]:
def read_tif_files(directory_path, pattern):
    """
    Read all TIFF files matching the pattern from directory
    pattern: 'NDMI' or 'NDVI'
    Returns: Dictionary with dates as keys and numpy arrays as values
    """
    tif_files = sorted(glob.glob(os.path.join(directory_path, f'*{pattern}*.tif')))
    data_dict = {}
    
    for file in tif_files:
        with rasterio.open(file) as src:
            # Read the first band
            data = src.read(1)
            # Use filename as key (adjust parsing based on your filename format)
            filename = os.path.basename(file)
            data_dict[filename] = data
    
    return data_dict

In [6]:

def prepare_features(ndmi_dir, ndvi_dir, precip_df):
    """
    Prepare features from TIFF files and precipitation data
    precip_df: DataFrame with columns [tiff_file, precipitation_sum, irrigation]
    """
    # Read TIFF files
    ndmi_dict = read_tif_files(ndmi_dir, 'NDMI')
    ndvi_dict = read_tif_files(ndvi_dir, 'NDVI')
    
    # Sort precipitation data by date
    precip_df = precip_df.sort_values('tiff_file')
    
    features = []
    targets = []
    dates = []
    
    # Iterate through precipitation data to ensure alignment
    for i in range(1, len(precip_df)):
        current_file = precip_df.iloc[i]['tiff_file']
        previous_file = precip_df.iloc[i-1]['tiff_file']
        
        # Check if we have all required data
        if (current_file in ndmi_dict and previous_file in ndmi_dict and
            current_file in ndvi_dict and previous_file in ndvi_dict):
            
            # Current values
            ndmi_current = ndmi_dict[current_file].flatten()
            ndvi_current = ndvi_dict[current_file].flatten()
            precip = precip_df.iloc[i]['precipitation_sum']
            
            # Previous values
            ndmi_prev = ndmi_dict[previous_file].flatten()
            ndvi_prev = ndvi_dict[previous_file].flatten()
            
            # Combine features
            feature_row = np.concatenate([
                ndmi_current,
                ndvi_current,
                ndmi_prev,
                ndvi_prev,
                [precip]
            ])
            
            features.append(feature_row)
            targets.append(precip_df.iloc[i]['irrigation'])
            dates.append(current_file)
    
    return np.array(features), np.array(targets), dates

In [7]:

def train_irrigation_model(features, targets):
    # Time series split for validation
    tscv = TimeSeriesSplit(n_splits=5)
    
    # XGBoost parameters
    params = {
        'objective': 'reg:squarederror',
        'learning_rate': 0.1,
        'max_depth': 6,
        'n_estimators': 100,
        'min_child_weight': 1,
        'subsample': 0.8,
        'colsample_bytree': 0.8,
        'gamma': 0
    }
    
    models = []
    scores = []
    
    for train_idx, val_idx in tscv.split(features):
        X_train, X_val = features[train_idx], features[val_idx]
        y_train, y_val = targets[train_idx], targets[val_idx]
        
        model = xgb.XGBRegressor(**params)
        model.fit(
            X_train, y_train,
            eval_set=[(X_val, y_val)],
            early_stopping_rounds=10,
            verbose=False
        )
        
        pred = model.predict(X_val)
        score = r2_score(y_val, pred)
        rmse = np.sqrt(mean_squared_error(y_val, pred))
        
        models.append(model)
        scores.append({'R2': score, 'RMSE': rmse})
    
    best_model_idx = np.argmax([s['R2'] for s in scores])
    return models[best_model_idx], scores

In [8]:
def optimize_irrigation(model, features, current_irrigation):
    optimal_irrigation = model.predict(features.reshape(1, -1))[0]
    
    # Apply constraints
    min_irrigation = 0
    max_irrigation = 50  # Adjust based on your requirements
    
    optimal_irrigation = np.clip(optimal_irrigation, min_irrigation, max_irrigation)
    savings = current_irrigation - optimal_irrigation if optimal_irrigation < current_irrigation else 0
    
    return optimal_irrigation, savings