In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import joblib  # For saving the model

# Step 1: Load Datasets
statewise_data = pd.read_csv("newindia.csv", encoding="utf-8")
plant_data = pd.read_csv("newplantss.csv", encoding="utf-8")

# Step 2: Standardize Column Names
statewise_data.columns = statewise_data.columns.str.strip().str.lower()
plant_data.columns = plant_data.columns.str.strip().str.lower()

# Step 3: Print actual column names for debugging
print("Statewise Data Columns:", statewise_data.columns.tolist())
print("Plant Data Columns:", plant_data.columns.tolist())

# Step 4: Rename mismatched columns
rename_map = {
    "Temperature": "temperature",
    "Temperature": "temperature"
}

statewise_data.rename(columns=rename_map, inplace=True)
plant_data.rename(columns=rename_map, inplace=True)

# Step 5: Ensure Soil Type is consistently formatted
statewise_data["soil type"] = statewise_data["soil type"].str.strip().str.lower()
plant_data["soil type"] = plant_data["soil type"].str.strip().str.lower()

# Step 6: Encode Plant Names
encoder = LabelEncoder()
plant_data["plant label"] = encoder.fit_transform(plant_data["plant name"])

# Step 7: Merge datasets based on Soil Type, pH, Temperature, and Rainfall
merged_data = statewise_data.merge(
    plant_data, on=["soil type", "soil ph", "temperature", "rainfall (mm)"], how="inner"
)

# Step 8: Select Features & Target Variable
X = merged_data[['soil ph', 'temperature', 'rainfall (mm)']]
y = merged_data["plant label"]

# Step 9: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 10: Train the Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 11: Save Model & Encoder
joblib.dump(model, "plant_recommendation_model.pkl")
joblib.dump(encoder, "label_encoder.pkl")

print("✅ Model training complete. Saved as plant_recommendation_model.pkl")


Statewise Data Columns: ['state', 'district', 'soil type', 'soil ph', 'temperature', 'rainfall (mm)']
Plant Data Columns: ['plant name', 'soil type', 'soil ph', 'temperature', 'rainfall (mm)', 'required fertilizer', 'common pests & recommended pesticides']
✅ Model training complete. Saved as plant_recommendation_model.pkl


In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import joblib  # For saving the model

# Step 1: Load Datasets
statewise_data = pd.read_csv("newindia.csv", encoding="utf-8")
plant_data = pd.read_csv("newplantss.csv", encoding="utf-8")

# Step 2: Standardize Column Names
statewise_data.columns = statewise_data.columns.str.strip().str.lower()
plant_data.columns = plant_data.columns.str.strip().str.lower()

# Step 3: Print actual column names for debugging
print("Statewise Data Columns:", statewise_data.columns.tolist())
print("Plant Data Columns:", plant_data.columns.tolist())

# Step 4: Rename mismatched columns
rename_map = {
    "Temperature": "temperature",
    "Temperature": "temperature"
}

statewise_data.rename(columns=rename_map, inplace=True)
plant_data.rename(columns=rename_map, inplace=True)

# Step 5: Ensure Soil Type is consistently formatted
statewise_data["soil type"] = statewise_data["soil type"].str.strip().str.lower()
plant_data["soil type"] = plant_data["soil type"].str.strip().str.lower()

# Step 6: Encode Plant Names
encoder = LabelEncoder()
plant_data["plant label"] = encoder.fit_transform(plant_data["plant name"])

# Step 7: Merge datasets based on Soil Type, pH, Temperature, and Rainfall
merged_data = statewise_data.merge(
    plant_data, on=["soil type", "soil ph", "temperature", "rainfall (mm)"], how="inner"
)

# Step 8: Select Features & Target Variable
X = merged_data[['soil ph', 'temperature', 'rainfall (mm)']]
y = merged_data["plant label"]

# Step 9: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 10: Train the Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 11: Save Model & Encoder
joblib.dump(model, "plant_recommendation_model.pkl")
joblib.dump(encoder, "label_encoder.pkl")

print("✅ Model training complete. Saved as plant_recommendation_model.pkl")


Statewise Data Columns: ['state', 'district', 'soil type', 'soil ph', 'temperature', 'rainfall (mm)']
Plant Data Columns: ['plant name', 'soil type', 'soil ph', 'temperature', 'rainfall (mm)', 'required fertilizer', 'common pests & recommended pesticides']
✅ Model training complete. Saved as plant_recommendation_model.pkl
