In [1]:
from google.colab import files
uploaded = files.upload()

Saving irrigation.csv to irrigation.csv


**Importing necessary libraries**

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report

from sklearn.preprocessing import MinMaxScaler
import joblib

**Load the Dataset**

In [4]:
df = pd.read_csv("irrigation.csv")
print("✅ Dataset loaded successfully!\n")

✅ Dataset loaded successfully!



**Preview of the Dataset (First 5 Rows)**

In [5]:
# first 5 rows to be printed, df.tail()
df.head()

Unnamed: 0.1,Unnamed: 0,sensor_0,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,parcel_0,parcel_1,parcel_2
0,0,1.0,2.0,1.0,7.0,0.0,1.0,1.0,4.0,0.0,...,8.0,1.0,0.0,2.0,1.0,9.0,2.0,0,1,0
1,1,5.0,1.0,3.0,5.0,2.0,2.0,1.0,2.0,3.0,...,4.0,5.0,5.0,2.0,2.0,2.0,7.0,0,0,0
2,2,3.0,1.0,4.0,3.0,4.0,0.0,1.0,6.0,0.0,...,3.0,3.0,1.0,0.0,3.0,1.0,0.0,1,1,0
3,3,2.0,2.0,4.0,3.0,5.0,0.0,3.0,2.0,2.0,...,4.0,1.0,1.0,4.0,1.0,3.0,2.0,0,0,0
4,4,4.0,3.0,3.0,2.0,5.0,1.0,3.0,1.0,1.0,...,1.0,3.0,2.0,2.0,1.0,1.0,0.0,1,1,0


**Dataset Structure Overview**

In [6]:
print("📊 Dataset Information Summary:\n")
print("🗂️  Total Entries and Columns:")
print("="*40)
df_shape = df.shape
print(f"🔢 Rows: {df_shape[0]} | 🧬 Columns: {df_shape[1]}")
print("="*40 + "\n")

print("🧾 Detailed Column Info:")
print("-"*40)
df.info()
print("-"*40)

print("\n📝 Note: Non-null entries indicate no missing data in that column.\n")

📊 Dataset Information Summary:

🗂️  Total Entries and Columns:
🔢 Rows: 2000 | 🧬 Columns: 24

🧾 Detailed Column Info:
----------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 24 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  2000 non-null   int64  
 1   sensor_0    2000 non-null   float64
 2   sensor_1    2000 non-null   float64
 3   sensor_2    2000 non-null   float64
 4   sensor_3    2000 non-null   float64
 5   sensor_4    2000 non-null   float64
 6   sensor_5    2000 non-null   float64
 7   sensor_6    2000 non-null   float64
 8   sensor_7    2000 non-null   float64
 9   sensor_8    2000 non-null   float64
 10  sensor_9    2000 non-null   float64
 11  sensor_10   2000 non-null   float64
 12  sensor_11   2000 non-null   float64
 13  sensor_12   2000 non-null   float64
 14  sensor_13   2000 non-null   float64
 15  sensor_14   2000 non-null   float

**Feature & Target Column Overview**

In [7]:
print("🧾 All Columns in the Dataset:\n")

columns = df.columns.tolist()

# Assuming first 19 are sensor features and last 3 are parcel labels
sensor_cols = columns[:19]
parcel_cols = columns[19:]

print("🧪 Sensor Features (🌡️, 💧, 🔋, etc.):")
print("──────────────────────────────────────")
for i, col in enumerate(sensor_cols, 1):
    print(f"{i:2d}. 📍 {col}")

print("\n🚜 Parcel Labels (Target Columns):")
print("────────────────────────────────────")
for i, col in enumerate(parcel_cols, 1):
    print(f"{i:2d}. 🎯 {col}")

🧾 All Columns in the Dataset:

🧪 Sensor Features (🌡️, 💧, 🔋, etc.):
──────────────────────────────────────
 1. 📍 Unnamed: 0
 2. 📍 sensor_0
 3. 📍 sensor_1
 4. 📍 sensor_2
 5. 📍 sensor_3
 6. 📍 sensor_4
 7. 📍 sensor_5
 8. 📍 sensor_6
 9. 📍 sensor_7
10. 📍 sensor_8
11. 📍 sensor_9
12. 📍 sensor_10
13. 📍 sensor_11
14. 📍 sensor_12
15. 📍 sensor_13
16. 📍 sensor_14
17. 📍 sensor_15
18. 📍 sensor_16
19. 📍 sensor_17

🚜 Parcel Labels (Target Columns):
────────────────────────────────────
 1. 🎯 sensor_18
 2. 🎯 sensor_19
 3. 🎯 parcel_0
 4. 🎯 parcel_1
 5. 🎯 parcel_2


**Cleaned Dataset Preview **

In [8]:
df = df.drop('Unnamed: 0', axis=1)
df.head()

Unnamed: 0,sensor_0,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,sensor_9,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,parcel_0,parcel_1,parcel_2
0,1.0,2.0,1.0,7.0,0.0,1.0,1.0,4.0,0.0,3.0,...,8.0,1.0,0.0,2.0,1.0,9.0,2.0,0,1,0
1,5.0,1.0,3.0,5.0,2.0,2.0,1.0,2.0,3.0,1.0,...,4.0,5.0,5.0,2.0,2.0,2.0,7.0,0,0,0
2,3.0,1.0,4.0,3.0,4.0,0.0,1.0,6.0,0.0,2.0,...,3.0,3.0,1.0,0.0,3.0,1.0,0.0,1,1,0
3,2.0,2.0,4.0,3.0,5.0,0.0,3.0,2.0,2.0,5.0,...,4.0,1.0,1.0,4.0,1.0,3.0,2.0,0,0,0
4,4.0,3.0,3.0,2.0,5.0,1.0,3.0,1.0,1.0,2.0,...,1.0,3.0,2.0,2.0,1.0,1.0,0.0,1,1,0


**Statistics of the dataset**

In [10]:
df.describe()

Unnamed: 0,sensor_0,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,sensor_9,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,parcel_0,parcel_1,parcel_2
count,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,...,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0
mean,1.437,1.659,2.6545,2.6745,2.8875,1.411,3.3155,4.2015,1.214,1.901,...,2.7315,3.416,1.2065,2.325,1.7295,2.2745,1.8135,0.6355,0.7305,0.212
std,1.321327,1.338512,1.699286,1.855875,1.816451,1.339394,2.206444,2.280241,1.386782,1.518668,...,1.774537,1.960578,1.258034,1.715181,1.561265,1.67169,1.469285,0.48141,0.443811,0.408827
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,1.0,1.0,1.0,2.0,0.0,2.0,3.0,0.0,1.0,...,1.0,2.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
50%,1.0,1.0,2.0,2.0,3.0,1.0,3.0,4.0,1.0,2.0,...,2.0,3.0,1.0,2.0,1.0,2.0,2.0,1.0,1.0,0.0
75%,2.0,2.0,4.0,4.0,4.0,2.0,5.0,6.0,2.0,3.0,...,4.0,5.0,2.0,3.0,3.0,3.0,3.0,1.0,1.0,0.0
max,8.0,9.0,10.0,11.0,12.0,7.0,13.0,12.0,8.0,9.0,...,11.0,11.0,6.0,10.0,11.0,10.0,7.0,1.0,1.0,1.0


**Splitting the Dataset into Features and Labels**

In [11]:
# -------------------------------
# STEP 2: DEFINE FEATURES AND LABELS
# -------------------------------

X = df.iloc[:, 0:20]   # This gives you columns 0 to 19 (sensor_0 to sensor_19)


y = df.iloc[:, 20:]

**Random Sample of Input Features (Sensor Data)**

In [12]:
X.sample(10)

Unnamed: 0,sensor_0,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,sensor_9,sensor_10,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19
1807,0.0,2.0,2.0,3.0,8.0,1.0,1.0,1.0,1.0,3.0,1.0,3.0,9.0,2.0,3.0,2.0,3.0,1.0,1.0,2.0
146,4.0,4.0,2.0,2.0,0.0,3.0,2.0,1.0,1.0,3.0,3.0,0.0,6.0,3.0,4.0,3.0,1.0,1.0,4.0,6.0
1957,1.0,1.0,3.0,2.0,4.0,3.0,6.0,1.0,0.0,2.0,0.0,5.0,1.0,4.0,2.0,2.0,2.0,2.0,4.0,0.0
1392,2.0,0.0,4.0,4.0,3.0,4.0,4.0,2.0,3.0,3.0,3.0,3.0,6.0,1.0,2.0,2.0,2.0,4.0,1.0,4.0
499,2.0,2.0,2.0,1.0,1.0,6.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,0.0,6.0,2.0,4.0,0.0
1918,1.0,3.0,2.0,1.0,3.0,2.0,2.0,2.0,0.0,3.0,0.0,2.0,3.0,3.0,5.0,4.0,6.0,2.0,3.0,1.0
274,5.0,5.0,4.0,1.0,3.0,5.0,1.0,4.0,3.0,2.0,4.0,3.0,3.0,0.0,1.0,3.0,1.0,4.0,1.0,1.0
1045,0.0,3.0,1.0,1.0,5.0,1.0,4.0,5.0,1.0,1.0,1.0,3.0,4.0,6.0,4.0,0.0,3.0,1.0,2.0,0.0
254,0.0,1.0,3.0,3.0,0.0,0.0,5.0,7.0,0.0,3.0,0.0,5.0,7.0,2.0,8.0,1.0,1.0,2.0,2.0,0.0
1474,1.0,1.0,0.0,2.0,0.0,1.0,1.0,4.0,1.0,0.0,2.0,7.0,3.0,7.0,5.0,0.0,1.0,0.0,3.0,2.0


**Random Sample of Target Labels (Parcel Data)**

In [13]:
y.sample(10)

Unnamed: 0,parcel_0,parcel_1,parcel_2
29,0,1,0
1705,1,1,1
299,1,0,0
1432,1,1,0
1736,0,0,0
1029,1,0,0
1308,1,1,0
853,1,0,0
1336,1,1,1
880,0,0,0


**Sensor Feature Set Overview **

In [14]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 20 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   sensor_0   2000 non-null   float64
 1   sensor_1   2000 non-null   float64
 2   sensor_2   2000 non-null   float64
 3   sensor_3   2000 non-null   float64
 4   sensor_4   2000 non-null   float64
 5   sensor_5   2000 non-null   float64
 6   sensor_6   2000 non-null   float64
 7   sensor_7   2000 non-null   float64
 8   sensor_8   2000 non-null   float64
 9   sensor_9   2000 non-null   float64
 10  sensor_10  2000 non-null   float64
 11  sensor_11  2000 non-null   float64
 12  sensor_12  2000 non-null   float64
 13  sensor_13  2000 non-null   float64
 14  sensor_14  2000 non-null   float64
 15  sensor_15  2000 non-null   float64
 16  sensor_16  2000 non-null   float64
 17  sensor_17  2000 non-null   float64
 18  sensor_18  2000 non-null   float64
 19  sensor_19  2000 non-null   float64
dtypes: float

**Target Label Overview**

In [15]:
y.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   parcel_0  2000 non-null   int64
 1   parcel_1  2000 non-null   int64
 2   parcel_2  2000 non-null   int64
dtypes: int64(3)
memory usage: 47.0 KB


**Complete Sensor Feature Dataset**

In [16]:
X

Unnamed: 0,sensor_0,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,sensor_9,sensor_10,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19
0,1.0,2.0,1.0,7.0,0.0,1.0,1.0,4.0,0.0,3.0,1.0,3.0,6.0,8.0,1.0,0.0,2.0,1.0,9.0,2.0
1,5.0,1.0,3.0,5.0,2.0,2.0,1.0,2.0,3.0,1.0,3.0,2.0,2.0,4.0,5.0,5.0,2.0,2.0,2.0,7.0
2,3.0,1.0,4.0,3.0,4.0,0.0,1.0,6.0,0.0,2.0,3.0,2.0,4.0,3.0,3.0,1.0,0.0,3.0,1.0,0.0
3,2.0,2.0,4.0,3.0,5.0,0.0,3.0,2.0,2.0,5.0,3.0,1.0,2.0,4.0,1.0,1.0,4.0,1.0,3.0,2.0
4,4.0,3.0,3.0,2.0,5.0,1.0,3.0,1.0,1.0,2.0,4.0,5.0,3.0,1.0,3.0,2.0,2.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,4.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,4.0,3.0,3.0,1.0,2.0,3.0,2.0,1.0,1.0,0.0
1996,1.0,3.0,3.0,3.0,2.0,2.0,3.0,3.0,1.0,5.0,2.0,2.0,4.0,3.0,3.0,0.0,1.0,0.0,6.0,2.0
1997,1.0,3.0,3.0,1.0,1.0,4.0,8.0,1.0,0.0,0.0,3.0,2.0,4.0,2.0,3.0,4.0,4.0,4.0,1.0,0.0
1998,2.0,1.0,0.0,2.0,2.0,0.0,1.0,3.0,0.0,0.0,0.0,5.0,2.0,2.0,4.0,0.0,2.0,0.0,3.0,0.0


**Dataset Dimensions – Features vs. Labels**

In [17]:
X.shape, y.shape

((2000, 20), (2000, 3))