In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib

In [8]:
import os
# Define a function to preprocess input data
def preprocess_input(data):
    # Handle missing values
    data.dropna(inplace=True)
    
    # Encode categorical variables
    data = pd.get_dummies(data, columns=['Soil Type', 'Crop Type'])
    
    # Normalize numerical variables
    scaler = StandardScaler()
    numerical_cols = ['Temparature', 'Humidity', 'Moisture', 'Nitrogen', 'Potassium', 'Phosphorous']
    data[numerical_cols] = scaler.fit_transform(data[numerical_cols])
    
    return data

# Load the dataset and preprocess it
data = pd.read_csv(r'C:\Users\USER\Downloads\minipro\fertilizer\Fertilizer Prediction.csv')
processed_data = preprocess_input(data)

# Split data into features and target variable
X = processed_data.drop(columns=['Fertilizer Name'])
y = processed_data['Fertilizer Name']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the RandomForestClassifier model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy:", accuracy)



Model Accuracy: 0.95


In [9]:
import os
os.makedirs('model', exist_ok=True)
joblib.dump(model, 'fertilizer_recommendation_model.pkl')

['fertilizer_recommendation_model.pkl']