In [None]:

import pandas as pd
import numpy as np

file_path = '/content/fertilizer_data.csv'
data = pd.read_csv(file_path)
print(data.head())

   Temparature  Humidity   Moisture Soil Type  Crop Type  Nitrogen  Potassium  \
0           26         52        38     Sandy      Maize        37          0   
1           29         52        45     Loamy  Sugarcane        12          0   
2           34         65        62     Black     Cotton         7          9   
3           32         62        34       Red    Tobacco        22          0   
4           28         54        46    Clayey      Paddy        35          0   

   Phosphorous Fertilizer Name  
0            0            Urea  
1           36             DAP  
2           30        14-35-14  
3           20           28-28  
4            0            Urea  


In [None]:
print("Missing Values:\n", data.isnull().sum())

print("\nSummary Statistics:\n", data.describe())

categorical_columns = ['Soil Type', 'Crop Type', 'Fertilizer Name']
for col in categorical_columns:
    print(f"\nUnique values in {col}:\n", data[col].unique())

Missing Values:
 Temparature        0
Humidity           0
Moisture           0
Soil Type          0
Crop Type          0
Nitrogen           0
Potassium          0
Phosphorous        0
Fertilizer Name    0
dtype: int64

Summary Statistics:
        Temparature  Humidity    Moisture   Nitrogen  Potassium  Phosphorous
count    99.000000  99.000000  99.000000  99.000000  99.000000    99.000000
mean     30.282828  59.151515  43.181818  18.909091   3.383838    18.606061
std       3.502304   5.840331  11.271568  11.599693   5.814667    13.476978
min      25.000000  50.000000  25.000000   4.000000   0.000000     0.000000
25%      28.000000  54.000000  34.000000  10.000000   0.000000     9.000000
50%      30.000000  60.000000  41.000000  13.000000   0.000000    19.000000
75%      33.000000  64.000000  50.500000  24.000000   7.500000    30.000000
max      38.000000  72.000000  65.000000  42.000000  19.000000    42.000000

Unique values in Soil Type:
 ['Sandy' 'Loamy' 'Black' 'Red' 'Clayey']

Uni

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

data.rename(columns={'Humidity ': 'Humidity'}, inplace=True)

label_encoders = {}
categorical_columns = ['Soil Type', 'Crop Type', 'Fertilizer Name']

for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

X = data.drop(columns=['Fertilizer Name'])
y = data['Fertilizer Name']
print("Columns in X:", X.columns)

scaler = StandardScaler()
numerical_columns = ['Temparature', 'Humidity', 'Moisture', 'Nitrogen', 'Potassium', 'Phosphorous']


missing_columns = [col for col in numerical_columns if col not in X.columns]
if missing_columns:
    raise ValueError(f"The following numerical columns are missing: {missing_columns}")

X[numerical_columns] = scaler.fit_transform(X[numerical_columns])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

Columns in X: Index(['Temparature', 'Humidity', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous'],
      dtype='object')
X_train shape: (79, 8)
X_test shape: (20, 8)
y_train shape: (79,)
y_test shape: (20,)


In [None]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(random_state=42)

rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_test)



import joblib

joblib.dump(rf_model, 'random_forest_model.pkl')

joblib.dump(label_encoders, 'label_encoders.pkl')

joblib.dump(scaler, 'scaler.pkl')

print("\nModel, label encoders, and scaler saved successfully!")


Model, label encoders, and scaler saved successfully!


In [None]:
from sklearn.model_selection import cross_val_score


cv_scores = cross_val_score(rf_model, X, y, cv=5, scoring='accuracy')

print("Cross-Validation Accuracy Scores:", cv_scores)
print("Mean CV Accuracy:", np.mean(cv_scores))

Cross-Validation Accuracy Scores: [0.9  0.95 1.   0.95 1.  ]
Mean CV Accuracy: 0.96


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42),
                           param_grid=param_grid,
                           cv=5,
                           scoring='accuracy',
                           n_jobs=-1)

grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Accuracy:", grid_search.best_score_)


Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Best Cross-Validation Accuracy: 0.975


In [None]:
import joblib
import pandas as pd

model = joblib.load('random_forest_model.pkl')
label_encoders = joblib.load('label_encoders.pkl')
scaler = joblib.load('scaler.pkl')

def predict_fertilizer(input_data):

    try:
        input_df = pd.DataFrame([input_data])
        for col in ['Soil Type', 'Crop Type']:
            if input_data[col] not in label_encoders[col].classes_:
                allowed_values = list(label_encoders[col].classes_)
                return f"Error: Unknown value '{input_data[col]}' for column '{col}'. Allowed values: {allowed_values}"

            input_df[col] = label_encoders[col].transform(input_df[col])

        numerical_columns = ['Temparature', 'Humidity', 'Moisture', 'Nitrogen', 'Potassium', 'Phosphorous']
        input_df[numerical_columns] = scaler.transform(input_df[numerical_columns])

        predicted_fertilizer = model.predict(input_df)
        predicted_fertilizer_name = label_encoders['Fertilizer Name'].inverse_transform(predicted_fertilizer)

        return predicted_fertilizer_name[0]

    except Exception as e:
        return f"An error occurred: {str(e)}"

if __name__ == "__main__":

    input_data = {
        'Temparature': 30,
        'Humidity': 60,
        'Moisture': 45,
        'Soil Type': 3,
        'Crop Type': 3,
        'Nitrogen': 20,
        'Potassium': 10,
        'Phosphorous': 15
    }

    predicted_fertilizer = predict_fertilizer(input_data)
    print("Predicted Fertilizer:", predicted_fertilizer)

Predicted Fertilizer: Error: Unknown value '3' for column 'Soil Type'. Allowed values: ['Black', 'Clayey', 'Loamy', 'Red', 'Sandy']
