## Importing Required Libraries
This section imports all necessary libraries for data manipulation, visualization, machine learning model development, and evaluation.

In [None]:
import pandas as pd# Importing pandas for data manipulation
import matplotlib.pyplot as plt# Importing matplotlib for plotting
import seaborn as sns# Importing seaborn for enhanced visualization
from sklearn.model_selection import train_test_split# Importing train_test_split for splitting the dataset
from sklearn.ensemble import RandomForestClassifier# Importing RandomForestClassifier for classification tasks
from sklearn.multioutput import MultiOutputClassifier# Importing MultiOutputClassifier for multi-label classification
from sklearn.metrics import classification_report# Importing classification_report for evaluating the model

from sklearn.preprocessing import MinMaxScaler# Importing MinMaxScaler for feature scaling
import joblib# Importing joblib for saving and loading models

## Loading the Dataset
The dataset is loaded into a pandas DataFrame named `df` for further analysis and processing.

In [None]:
df=pd.read_csv("irrigation_machine.csv")

In [None]:
df.head()#print first 5 rows of the dataframe

In [None]:
df.tail()#print last 5 rows of the dataframe

In [None]:
df.info()#print information about the dataframe

In [None]:
df.columns#give name of all name of columns


In [None]:
df = df.drop('Unnamed: 0', axis=1)
df.head() # # print print first first 5 5 rows rows of of the the dataframe dataframe

In [None]:
df.describe()#print statistical summary of the dataframe

## Defining Features and Labels
In this step, the independent variables (features) and dependent variables (labels) are separated for use in model training and evaluation.

In [None]:
x=df.iloc[:, 0:20]#give name of all columns from 0 to 20 as 
#x=independent variable
y=df.iloc[:, 20:]#dependent variable

In [None]:
x.sample(10)#print random n rows of the dataframe

In [None]:
y.sample(10)#print random n rows of the dataframe

In [None]:
x.info()#print information about the independent variable dataframe

In [None]:
y.info()#print information about the dependent variable dataframe

In [None]:
x#print independent variable dataframe

In [None]:
x.shape,y.shape  #give shape of independent and dependent variable dataframe

## Feature Scaling
Feature scaling is performed to normalize all input features to a common scale, typically between 0 and 1. This process enhances model performance, ensures balanced feature contribution, and improves numerical stability during training.

In [None]:
# Create a MinMaxScaler object to normalize the features
scalar = MinMaxScaler()

# Fit the scaler to the data and transform it
# fit: learns the min/max values of features
# transform: applies the scaling using formula (x - min)/(max - min)
x_scaled = scalar.fit_transform(x)

# Display the scaled features where:
# - All values are now between 0 and 1
# - Helps prevent features with larger ranges from dominating the model
# - Important for model performance and stability
x_scaled
#output is a numpy 2d array with normalized values

## Splitting the Dataset: Training and Testing Sets
The dataset is divided into training and testing subsets to evaluate the model's generalization performance. Typically, 80% of the data is used for training and 20% for testing.

In [None]:
#split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2, random_state=42)
#test size=0.2 means 20% of the data will be used for testing, random_state=42 ensures reproducibility


In [None]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape#give shape of training and testing sets

## Model Training: Classifier Development
A Random Forest classifier is trained using the training data. MultiOutputClassifier is used to enable multi-label classification, allowing the model to predict irrigation needs for multiple parcels simultaneously.

In [None]:
# Use MultiOutputClassifier to handle multi-label classification
from sklearn.ensemble import RandomForestClassifier # Importing RandomForestClassifier for classification tasks
from sklearn.multioutput import MultiOutputClassifier   # Importing MultiOutputClassifier for multi-label classification

# Custom hyperparameters for RandomForest
rf = RandomForestClassifier(
    n_estimators=200,         # Number of trees
    max_depth=10,             # Maximum depth of each tree
    min_samples_split=4,      # Minimum samples to split a node
    min_samples_leaf=2,       # Minimum samples per leaf
    max_features='sqrt',      # Number of features to consider at each split ('auto', 'sqrt', 'log2', or int)
    random_state=42 # For reproducibility
)

# Wrap it with MultiOutputClassifier
model=MultiOutputClassifier(rf) # Using MultiOutputClassifier to handle multi-label classification
# Train the model
model.fit(x_train, y_train) 

## Model Evaluation
The trained model is evaluated on the test set using classification metrics to assess its predictive performance and reliability.

In [None]:
y_pred=model.predict(x_test) # Predict on the test set
print("Classification Report:")
print(classification_report(y_test, y_pred)) # Print classification report

In [None]:
print(df[['parcel_0', 'parcel_1', 'parcel_2']].sum())   # Print the sum of each parcel's irrigation needs

In [None]:
import matplotlib.pyplot as plt  # Import library for creating plots

# Dictionary containing different irrigation scenarios
conditions = {
    # Single parcel operations
    "Parcel 0 ON": df['parcel_0'],          # Status of first parcel
    "Parcel 1 ON": df['parcel_1'],          # Status of second parcel
    "Parcel 2 ON": df['parcel_2'],          # Status of third parcel
    
    # Combined operations (using & for logical AND)
    "Parcel 0 & 1 ON": df['parcel_0'] & df['parcel_1'],    # When both parcel 0 and 1 are active
    "Parcel 0 & 2 ON": df['parcel_0'] & df['parcel_2'],    # When both parcel 0 and 2 are active
    "Parcel 1 & 2 ON": df['parcel_1'] & df['parcel_2'],    # When both parcel 1 and 2 are active
    "All Parcels ON": df['parcel_0'] & df['parcel_1'] & df['parcel_2'],  # When all parcels are active
}

# Create a figure with multiple subplots stacked vertically
# nrows=len(conditions): Create as many rows as conditions
# figsize=(10,15): Set figure size width=10, height=15
# sharex=True: All subplots share the same x-axis
fig, axs = plt.subplots(nrows=len(conditions), figsize=(10,15), sharex=True)

# Create plots for each condition
for ax, (title, condition) in zip(axs, conditions.items()):
    # ax.step: Create a step plot (good for ON/OFF data)
    # df.index: X-axis values (time points)
    # condition.astype(int): Convert boolean to 0/1
    # where='post': Step happens after the data point
    ax.step(df.index, condition.astype(int), where='post', linewidth=1, color='teal')
    
    # Set title and labels for each subplot
    ax.set_title(f"Sprinkler - {title}")     # Add title to subplot
    ax.set_ylabel("Status")                   # Label y-axis
    ax.set_yticks([0, 1])                    # Set y-axis tick positions
    ax.set_yticklabels(['OFF', 'ON'])        # Label the ticks

# Add x-axis label to the bottom subplot
axs[-1].set_xlabel("Time Index (Row Number)")

# Display all the plots
plt.show()

In [None]:
# Calculate combined activity of all pumps (overlap)
any_pump_on = (df['parcel_0'] == 1) | (df['parcel_1'] == 1) | (df['parcel_2'] == 1)

plt.figure(figsize=(15, 5))

# Plot individual pump statuses
plt.step(df.index, df['parcel_0'], where='post', linewidth=2, label='Parcel 0 Pump', color='blue')
plt.step(df.index, df['parcel_1'], where='post', linewidth=2, label='Parcel 1 Pump', color='orange')
plt.step(df.index, df['parcel_2'], where='post', linewidth=2, label='Parcel 2 Pump', color='green')

plt.title("Pump Activity and Combined Farm Coverage")
plt.xlabel("Time Index (Row Number)")
plt.ylabel("Status")
plt.yticks([0, 1], ['OFF', 'ON'])
plt.legend(loc='upper right')
plt.show()


## Machine Learning Model Workflow Steps
This notebook follows a structured approach to develop a machine learning model for smart irrigation. The key steps are:
1. **Data Collection**: Gather relevant data for the irrigation system.
2. **Data Loading**: Import the dataset into the working environment for analysis.
3. **Exploratory Data Analysis (EDA)**: Analyze and visualize the data to understand patterns, trends, and relationships.
4. **Model Building**: Develop and train the machine learning model using appropriate algorithms and techniques.
5. **Model Evaluation**: Assess the model's performance using suitable metrics and validation methods.
6. **Model Saving**: Save the trained model for future deployment and inference.

In [None]:
import joblib
from sklearn.pipeline import Pipeline

joblib.dump(model, "Farm_Irrigation_System.pkl")