In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("Crop_recommendation.csv")

In [None]:
df

In [None]:
df.columns

In [None]:
X = df[['N','temperature', 'humidity', 'ph', 'rainfall']]
Y = df['P']

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.20)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score,mean_absolute_error

models = {
    "Linear Regression" : LinearRegression(),
    "Lasso" : Lasso(),
    "Ridge" : Ridge(),
    "ElasticNet" : ElasticNet(),
    "SVR" : SVR(),
    "K Nearest" : KNeighborsRegressor(),
    "Decision Tree" : DecisionTreeRegressor(),
    "Ada Boost" : AdaBoostRegressor(),
    "Random Forest" : RandomForestRegressor(),
    "Gradient" : GradientBoostingRegressor(),
    "XGB" : XGBRegressor()
}

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train,Y_train)
    Y_pred = model.predict(X_test)

    acc = r2_score(Y_test,Y_pred)
    mae = mean_absolute_error(Y_test,Y_pred)
    print(f"R2 value for {list(models.keys())[i]} : ", acc*100)
    print(f"MAE value for {list(models.keys())[i]} : ", mae)
    print("-"*50) # Just to make a boundary

In [None]:
from sklearn.model_selection import RandomizedSearchCV

K_params = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'p': [1, 2]  # 1 = Manhattan, 2 = Euclidean
}

random_params = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

XGB_params = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'subsample': [0.7, 0.8, 1.0],
    'colsample_bytree': [0.7, 0.8, 1.0],
    'gamma': [0, 1, 5]
}

processing = [
        ("K",KNeighborsRegressor(),K_params),
        ("Random",RandomForestRegressor(),random_params),
        ("XGB",XGBRegressor(),XGB_params)
    ]

for name,model,param in processing:
    random = RandomizedSearchCV(estimator=model,
                                param_distributions=param,
                                n_jobs=-1,
                                verbose=3,
                                cv=5)
    random.fit(X_train,Y_train)
    Y_pred = random.predict(X_test)

    print(f"Best Paramater for {name} : " , random.best_params_)

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score,mean_absolute_error

models = {
    "Random Forest" : RandomForestRegressor(n_estimators= 300, min_samples_split= 2, min_samples_leaf= 2, max_depth= 10, bootstrap= True),
}

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train,Y_train)
    Y_pred = model.predict(X_test)

    acc = r2_score(Y_test,Y_pred)
    mae = mean_absolute_error(Y_test,Y_pred)
    print(f"R2 value for {list(models.keys())[i]} : ", acc*100)
    print(f"MAE value for {list(models.keys())[i]} : ", mae)
    print("-"*50) # Just to make a boundary

In [None]:
Y_pred = model.predict(scaler.transform([[66,31.87,70.78,7.7,12.07]]))

In [None]:
Y_pred

In [None]:
import pickle

In [None]:
with open("P_pred_model.pkl","wb") as f:
    pickle.dump(model,f)

with open("P_pred_scaler.pkl","wb") as f:
    pickle.dump(scaler,f)