In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error, r2_score


# ─────────────────────────────────────────────
# PART 1: Crop Recommendation Model
# ─────────────────────────────────────────────

In [2]:
crop_df = pd.read_csv("Crop_recommendation.csv")

crop_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 137.6+ KB


In [3]:
X_crop = crop_df.drop("label", axis=1)
y_crop = crop_df["label"]

# Encode labels
crop_encoder = LabelEncoder()
y_crop_encoded = crop_encoder.fit_transform(y_crop)


In [4]:
Xc_train, Xc_test, yc_train, yc_test = train_test_split(X_crop, y_crop_encoded, test_size=0.2, random_state=42)


In [5]:
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(Xc_train, yc_train)

In [6]:
yc_pred = clf.predict(Xc_test)
report = classification_report(yc_test, yc_pred, output_dict=True)


In [7]:
plt.figure(figsize=(10, 6))
sns.heatmap(pd.DataFrame(report).iloc[:-1, :].T, annot=True, cmap="Blues")
plt.title("Crop Recommendation Classification Report")
plt.tight_layout()
plt.savefig("crop_classification_report.png")
plt.close()


In [8]:
joblib.dump(clf, "crop_recommendation_model.pkl")
joblib.dump(crop_encoder, "crop_label_encoder.pkl")


['crop_label_encoder.pkl']

# ─────────────────────────────────────────────
# PART 2: Crop Yield Prediction Model
# ─────────────────────────────────────────────


In [9]:
yield_df = pd.read_csv("crop_yield.csv")  # Replace with your actual file
yield_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Data columns (total 10 columns):
 #   Column                  Non-Null Count    Dtype  
---  ------                  --------------    -----  
 0   Region                  1000000 non-null  object 
 1   Soil_Type               1000000 non-null  object 
 2   Crop                    1000000 non-null  object 
 3   Rainfall_mm             1000000 non-null  float64
 4   Temperature_Celsius     1000000 non-null  float64
 5   Fertilizer_Used         1000000 non-null  bool   
 6   Irrigation_Used         1000000 non-null  bool   
 7   Weather_Condition       1000000 non-null  object 
 8   Days_to_Harvest         1000000 non-null  int64  
 9   Yield_tons_per_hectare  1000000 non-null  float64
dtypes: bool(2), float64(3), int64(1), object(4)
memory usage: 62.9+ MB


In [10]:
# Encode categorical columns
le_region = LabelEncoder()
le_soil = LabelEncoder()
le_crop = LabelEncoder()
le_weather = LabelEncoder()


In [11]:
yield_df["Region_encoded"] = le_region.fit_transform(yield_df["Region"])
yield_df["Soil_encoded"] = le_soil.fit_transform(yield_df["Soil_Type"])
yield_df["Crop_encoded"] = le_crop.fit_transform(yield_df["Crop"])
yield_df["Weather_encoded"] = le_weather.fit_transform(yield_df["Weather_Condition"])


In [12]:
# Feature Set
features = [
    "Region_encoded", "Soil_encoded", "Crop_encoded", "Rainfall_mm",
    "Temperature_Celsius", "Fertilizer_Used", "Irrigation_Used",
    "Weather_encoded", "Days_to_Harvest"
]


In [13]:
X_yield = yield_df[features]
y_yield = yield_df["Yield_tons_per_hectare"]

Xy_train, Xy_test, yy_train, yy_test = train_test_split(X_yield, y_yield, test_size=0.2, random_state=42)


In [25]:
reg = RandomForestRegressor( n_estimators=10, max_depth=5, min_samples_leaf=4,random_state=42 )
reg.fit(Xy_train, yy_train)

In [26]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Make predictions
yy_pred = reg.predict(Xy_test)

# Calculate evaluation metrics
mse = mean_squared_error(yy_test, yy_pred)
rmse = np.sqrt(mse)  # Root Mean Squared Error
mae = mean_absolute_error(yy_test, yy_pred)  # Mean Absolute Error
r2 = r2_score(yy_test, yy_pred)  # R² Score

# Print results
print(f"📊 Model Evaluation Metrics:")
print(f"➡️ Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"➡️ Mean Absolute Error (MAE): {mae:.2f}")
print(f"➡️ R² Score: {r2:.2f}")


📊 Model Evaluation Metrics:
➡️ Root Mean Squared Error (RMSE): 0.54
➡️ Mean Absolute Error (MAE): 0.43
➡️ R² Score: 0.90


In [27]:
plt.figure(figsize=(8, 6))
sns.scatterplot(x=yy_test, y=yy_pred)
plt.xlabel("Actual Yield")
plt.ylabel("Predicted Yield")
plt.title(f"Yield Prediction\nRMSE: {rmse:.2f}, R²: {r2:.2f}")
plt.tight_layout()
plt.savefig("yield_prediction_plot.png")
plt.close()


In [29]:
# Save everything
joblib.dump(reg, "crop_yield_model.pkl")
joblib.dump(le_region, "region_encoder.pkl")
joblib.dump(le_soil, "soil_encoder.pkl")
joblib.dump(le_crop, "crop_encoder.pkl")
joblib.dump(le_weather, "weather_encoder.pkl")

print("✅ All models trained, evaluated, and exported successfully!")

✅ All models trained, evaluated, and exported successfully!
