In [18]:
import pandas as pd

df = pd.read_csv("irrigation_data.csv")

print(df.head())

   Crop Type Soil Type  Temperature (°C)  Humidity (%)  Soil Moisture (%)  \
0      Wheat      Silt             31.83         68.20              42.28   
1      Wheat     Sandy             11.47         53.97              93.81   
2    Soybean      Silt             42.05         46.77              95.93   
3     Barley      Silt             13.15         52.30              10.12   
4  Sugarcane     Sandy             38.97         48.61              58.02   

   Rainfall (mm)  Water Requirement (L/sq.m)  
0           2.65                        3.42  
1         244.31                        0.00  
2         264.71                        0.00  
3         204.36                        0.00  
4         228.59                        0.00  


In [19]:
from sklearn.preprocessing import LabelEncoder

label_enc_crop = LabelEncoder()
label_enc_soil = LabelEncoder()

df["Crop Type"] = label_enc_crop.fit_transform(df["Crop Type"])
df["Soil Type"] = label_enc_soil.fit_transform(df["Soil Type"])

import joblib
joblib.dump(label_enc_crop, "crop_encoder.pkl")
joblib.dump(label_enc_soil, "soil_encoder.pkl")

# Display processed dataset
print(df.head())


   Crop Type  Soil Type  Temperature (°C)  Humidity (%)  Soil Moisture (%)  \
0          7          4             31.83         68.20              42.28   
1          7          3             11.47         53.97              93.81   
2          4          4             42.05         46.77              95.93   
3          0          4             13.15         52.30              10.12   
4          5          3             38.97         48.61              58.02   

   Rainfall (mm)  Water Requirement (L/sq.m)  
0           2.65                        3.42  
1         244.31                        0.00  
2         264.71                        0.00  
3         204.36                        0.00  
4         228.59                        0.00  


In [20]:
from sklearn.model_selection import train_test_split

# Features (X) and target variable (y)
X = df.drop(columns=["Water Requirement (L/sq.m)"])
y = df["Water Requirement (L/sq.m)"]

# Split into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [21]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(n_estimators=100, random_state=42)

model.fit(X_train, y_train)

joblib.dump(model, "irrigation_model.pkl")
print("Model trained and saved successfully!")


Model trained and saved successfully!


In [22]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Predict on test set
y_pred = model.predict(X_test)

# Evaluate performance
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5

print(f"MAE: {mae}, RMSE: {rmse}")


MAE: 0.06880225000000004, RMSE: 0.22997757561336288


In [23]:
model = joblib.load("irrigation_model.pkl")
crop_encoder = joblib.load("crop_encoder.pkl")
soil_encoder = joblib.load("soil_encoder.pkl")

new_data = {
    "Crop Type": "Wheat",
    "Soil Type": "Loamy",
    "Temperature (°C)": 30,
    "Humidity (%)": 50,
    "Soil Moisture (%)": 40,
    "Rainfall (mm)": 10
}

new_data["Crop Type"] = crop_encoder.transform([new_data["Crop Type"]])[0]
new_data["Soil Type"] = soil_encoder.transform([new_data["Soil Type"]])[0]


input_df = pd.DataFrame([new_data])

predicted_water = model.predict(input_df)[0]

print(f"Predicted Water Requirement: {predicted_water} L/sq.m")


Predicted Water Requirement: 4.161999999999998 L/sq.m


In [24]:
labels = [
    {v: i for i, v in enumerate(df["Soil Type"].unique())},
    {v: i for i, v in enumerate(df["Crop Type"].unique())},
]


In [13]:
labels

[{np.int64(4): 0,
  np.int64(3): 1,
  np.int64(2): 2,
  np.int64(1): 3,
  np.int64(0): 4},
 {np.int64(7): 0,
  np.int64(4): 1,
  np.int64(0): 2,
  np.int64(5): 3,
  np.int64(3): 4,
  np.int64(2): 5,
  np.int64(6): 6,
  np.int64(1): 7}]

In [25]:
df

Unnamed: 0,Crop Type,Soil Type,Temperature (°C),Humidity (%),Soil Moisture (%),Rainfall (mm),Water Requirement (L/sq.m)
0,7,4,31.83,68.20,42.28,2.65,3.42
1,7,3,11.47,53.97,93.81,244.31,0.00
2,4,4,42.05,46.77,95.93,264.71,0.00
3,0,4,13.15,52.30,10.12,204.36,0.00
4,5,3,38.97,48.61,58.02,228.59,0.00
...,...,...,...,...,...,...,...
9995,4,3,42.71,11.75,81.45,117.58,0.77
9996,1,3,27.37,63.32,18.00,282.62,0.00
9997,6,2,33.27,37.77,45.65,280.79,0.00
9998,2,4,29.89,59.56,0.31,275.22,0.00


In [28]:
import pandas as pd
import json

# Load the dataset (assuming it's in a CSV file)
df = pd.read_csv("irrigation_data.csv")  # Update the filename accordingly

# Creating label mappings
labels = {
    "Soil Type": {v: i for i, v in enumerate(df["Soil Type"].unique())},
    "Crop Type": {v: i for i, v in enumerate(df["Crop Type"].unique())}
}

# Applying label encoding to the DataFrame
df["Soil Type"] = df["Soil Type"].map(labels["Soil Type"])
df["Crop Type"] = df["Crop Type"].map(labels["Crop Type"])

# Save the encoded dataset
df.to_csv("encoded_water_prediction_dataset.csv", index=False)

# Save labels as JSON
with open("labels.json", "w") as f:
    json.dump(labels, f, indent=4)

print("Label encoding completed and saved!")


Label encoding completed and saved!
