In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import joblib

# 1. Load and preprocess data
def load_and_prepare(file_path):
    df = pd.read_csv("/content/crop_yield.csv")

    # Auto-detect target column (containing 'yield')
    target_col = [col for col in df.columns if 'yield' in col.lower()][0]

    # Encode categorical features
    le_seed = LabelEncoder()
    le_yield = LabelEncoder()

    df['seed_type'] = le_seed.fit_transform(df['seed_type'])
    df[target_col] = le_yield.fit_transform(df[target_col])

    X = df.drop(columns=[target_col])
    y = df[target_col]

    return X, y, le_seed, le_yield, target_col

# 2. Train model
def train_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestClassifier(n_estimators=100)
    model.fit(X_train, y_train)

    # Evaluation
    accuracy = accuracy_score(y_test, model.predict(X_test))
    print(f"\n✅ Model Accuracy: {accuracy * 100:.2f}%")
    return model

# 3. Predict from user input
def predict_user_input(model, le_seed, le_yield):
    print("\nEnter input to predict crop yield category:")
    soil = int(input("Enter soil type (as number): "))
    rain = int(input("Enter rainfall (in mm): "))
    seed = input("Enter seed type (e.g. A, B, C): ")

    seed_encoded = le_seed.transform([seed])[0]
    features = [[soil, rain, seed_encoded]]

    pred = model.predict(features)[0]
    predicted_label = le_yield.inverse_transform([pred])[0]

    print(f"🔮 Predicted Crop Yield Category: {predicted_label}")

# Main
def main():
    file_path = "/mnt/data/39db7b72-0b76-4031-a1cd-5fe8fa9ca1cc.csv"
    X, y, le_seed, le_yield, target_col = load_and_prepare(file_path)
    model = train_model(X, y)
    joblib.dump(model, "crop_yield_model.joblib")

    predict_user_input(model, le_seed, le_yield)

if __name__ == "__main__":
    main()



✅ Model Accuracy: 40.00%

Enter input to predict crop yield category:
Enter soil type (as number): 5
