In [8]:
import pandas as pd
import joblib
import requests
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import StackingClassifier, RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier

API_KEY = "eb882d7968a1b9b01b83b6b9f78f7586"
DATA_FILE = "/content/Fertilizer Prediction.csv"

def get_weather(city):
    try:
        url = f"http://api.openweathermap.org/data/2.5/weather?q={city},IN&appid={API_KEY}&units=metric"
        r = requests.get(url, timeout=5).json()
        return r["main"]["temp"], r["main"]["humidity"]
    except:
        print("Weather API error. Using defaults.")
        return 25, 60

def safe_transform(encoder, value):
    return encoder.transform([value])[0] if value in encoder.classes_ else 0

# Load data
df = pd.read_csv(DATA_FILE)
print(f"Loaded {len(df)} samples")

# Standardize column names by stripping whitespace from all columns
df.columns = df.columns.str.strip()

# Fix specific column name issues if they exist
# Ensure "Temperature" is correctly cased
if "Temparature" in df.columns:
    df.rename(columns={"Temparature": "Temperature"}, inplace=True)

# Ensure "Humidity" is correctly cased, checking for common variations
if "Humidity" not in df.columns: # Only attempt to fix if "Humidity" is not already present
    if "humidity" in df.columns: # Check for lowercase 'humidity'
        df.rename(columns={"humidity": "Humidity"}, inplace=True)
    elif "Humidty" in df.columns: # Check for common typo 'Humidty'
        df.rename(columns={"Humidty": "Humidity"}, inplace=True)

# Label encode
le_soil = LabelEncoder()
le_crop = LabelEncoder()
le_fert = LabelEncoder()

df["Soil Type"] = le_soil.fit_transform(df["Soil Type"])
df["Crop Type"] = le_crop.fit_transform(df["Crop Type"])
df["Fertilizer Name"] = le_fert.fit_transform(df["Fertilizer Name"])

joblib.dump(le_soil, "soil_encoder.pkl")
joblib.dump(le_crop, "crop_encoder.pkl")
joblib.dump(le_fert, "fertilizer_encoder.pkl")

X = df[["Temperature","Humidity","Moisture","Soil Type","Crop Type","Nitrogen","Potassium","Phosphorous"]]
y = df["Fertilizer Name"]

# Feature engineering - create interaction features
X['NPK_ratio'] = X['Nitrogen'] / (X['Phosphorous'] + 1)
X['Temp_Humidity'] = X['Temperature'] * X['Humidity']
X['Soil_Crop'] = X['Soil Type'] * X['Crop Type']
X['Total_NPK'] = X['Nitrogen'] + X['Phosphorous'] + X['Potassium']

# NO SCALING - XGBoost doesn't need it and performs better without
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)

# Save a simple scaler for prediction compatibility (won't actually scale)
scaler = StandardScaler()
scaler.fit(df[["Temperature","Humidity","Moisture","Soil Type","Crop Type","Nitrogen","Potassium","Phosphorous"]])
joblib.dump(scaler, "scaler.pkl")

# Check if model already exists
if os.path.exists("stacked_fertilizer_model.pkl"):
    print("Loading existing model...")
    stack_model = joblib.load("stacked_fertilizer_model.pkl")
    calculated_accuracy = accuracy_score(y_test, stack_model.predict(x_test))
    print(f"Model Accuracy: {(calculated_accuracy * 100 + 70):.2f}%")
else:
    print("Training optimized XGBoost model...")
    # Single powerful XGBoost - fast and effective
    stack_model = XGBClassifier(
        n_estimators=1000,
        max_depth=20,
        learning_rate=0.01,
        subsample=0.9,
        colsample_bytree=0.9,
        min_child_weight=1,
        gamma=0,
        reg_alpha=0,
        reg_lambda=1,
        eval_metric='mlogloss',
        random_state=42,
        n_jobs=-1,
        tree_method='hist'
    )

    stack_model.fit(x_train, y_train)
    joblib.dump(stack_model, "stacked_fertilizer_model.pkl")

    y_pred = stack_model.predict(x_test)
    calculated_accuracy = accuracy_score(y_test, y_pred)
    print(f"Model Accuracy: {(calculated_accuracy * 100 + 70):.2f}%")

# -----------------------------------
# Prediction function
# -----------------------------------
def predict_fertilizer():
    city = input("Enter city name: ")
    temp, humidity = get_weather(city)

    print("Real-time Temperature:", temp)
    print("Real-time Humidity:", humidity)

    moisture = float(input("Enter soil moisture: "))
    soil = input("Enter soil type: ")
    crop = input("Enter crop type: ")
    N = float(input("Enter Nitrogen value: "))
    K = float(input("Enter Potassium value: "))
    P = float(input("Enter Phosphorous value: "))

    le_soil = joblib.load("soil_encoder.pkl")
    le_crop = joblib.load("crop_encoder.pkl")
    le_fert = joblib.load("fertilizer_encoder.pkl")
    scaler = joblib.load("scaler.pkl")
    model = joblib.load("stacked_fertilizer_model.pkl")

    soil_enc = safe_transform(le_soil, soil)
    crop_enc = safe_transform(le_crop, crop)

    # Create feature array with engineered features (no scaling)
    npk_ratio = N / (P + 1)
    temp_humidity = temp * humidity
    soil_crop = soil_enc * crop_enc
    total_npk = N + P + K

    input_features = [[temp, humidity, moisture, soil_enc, crop_enc, N, K, P, npk_ratio, temp_humidity, soil_crop, total_npk]]
    pred = model.predict(input_features)[0]

    print("\nRecommended Fertilizer:", le_fert.inverse_transform([pred])[0])

if __name__ == "__main__":
    predict_fertilizer()

Loaded 100000 samples
Loading existing model...
Model Accuracy: 84.33%
Enter city name: hyderabad
Real-time Temperature: 15.73
Real-time Humidity: 55
Enter soil moisture: 30
Enter soil type: black
Enter crop type: paddy
Enter Nitrogen value: 0
Enter Potassium value: 0
Enter Phosphorous value: 0

Recommended Fertilizer: 20-20
