In [1]:

import pandas as pd

In [3]:
df = pd.read_csv("Crop_recommendation.csv")

In [4]:
df

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.717340,rice
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,coffee


In [5]:
X = df[['N','temperature', 'humidity', 'ph', 'rainfall']]
Y = df['P']

In [6]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.20)


In [7]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:

from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score,mean_absolute_error

In [9]:
models = {
    "Linear Regression" : LinearRegression(),
    "Lasso" : Lasso(),
    "Ridge" : Ridge(),
    "ElasticNet" : ElasticNet(),
    "SVR" : SVR(),
    "K Nearest" : KNeighborsRegressor(),
    "Decision Tree" : DecisionTreeRegressor(),
    "Ada Boost" : AdaBoostRegressor(),
    "Random Forest" : RandomForestRegressor(),
    "Gradient" : GradientBoostingRegressor(),
    "XGB" : XGBRegressor()
}


In [10]:
for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train,Y_train)
    Y_pred = model.predict(X_test)

    acc = r2_score(Y_test,Y_pred)
    mae = mean_absolute_error(Y_test,Y_pred)
    print(f"R2 value for {list(models.keys())[i]} : ", acc*100)
    print(f"MAE value for {list(models.keys())[i]} : ", mae)
    print("-"*50) # Just to make a boundary

R2 value for Linear Regression :  10.21276490637879
MAE value for Linear Regression :  23.74971485529926
--------------------------------------------------
R2 value for Lasso :  9.625966470864833
MAE value for Lasso :  23.57360440761468
--------------------------------------------------
R2 value for Ridge :  10.211747234268366
MAE value for Ridge :  23.74912125130187
--------------------------------------------------
R2 value for ElasticNet :  8.280632269331845
MAE value for ElasticNet :  23.6902239539894
--------------------------------------------------
R2 value for SVR :  15.908724326229173
MAE value for SVR :  19.58439217174938
--------------------------------------------------
R2 value for K Nearest :  76.04402001225097
MAE value for K Nearest :  9.77
--------------------------------------------------
R2 value for Decision Tree :  68.68535236552714
MAE value for Decision Tree :  10.815909090909091
--------------------------------------------------
R2 value for Ada Boost :  52.2189

In [11]:
from sklearn.model_selection import RandomizedSearchCV

K_params = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'p': [1, 2]  # 1 = Manhattan, 2 = Euclidean
}

random_params = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

XGB_params = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'subsample': [0.7, 0.8, 1.0],
    'colsample_bytree': [0.7, 0.8, 1.0],
    'gamma': [0, 1, 5]
}

processing = [
        ("K",KNeighborsRegressor(),K_params),
        ("Random",RandomForestRegressor(),random_params),
        ("XGB",XGBRegressor(),XGB_params)
    ]

for name,model,param in processing:
    random = RandomizedSearchCV(estimator=model,
                                param_distributions=param,
                                n_jobs=-1,
                                verbose=3,
                                cv=5)
    random.fit(X_train,Y_train)
    Y_pred = random.predict(X_test)

    print(f"Best Paramater for {name} : " , random.best_params_)


Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best Paramater for K :  {'weights': 'distance', 'p': 1, 'n_neighbors': 9, 'algorithm': 'brute'}
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best Paramater for Random :  {'n_estimators': 300, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_depth': 30, 'bootstrap': True}
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best Paramater for XGB :  {'subsample': 0.7, 'n_estimators': 100, 'max_depth': 5, 'learning_rate': 0.2, 'gamma': 5, 'colsample_bytree': 1.0}


In [12]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score,mean_absolute_error

models = {
    "Random Forest" : RandomForestRegressor(n_estimators= 300, min_samples_split= 2, min_samples_leaf= 2, max_depth= 10, bootstrap= True),
}

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train,Y_train)
    Y_pred = model.predict(X_test)

    acc = r2_score(Y_test,Y_pred)
    mae = mean_absolute_error(Y_test,Y_pred)
    print(f"R2 value for {list(models.keys())[i]} : ", acc*100)
    print(f"MAE value for {list(models.keys())[i]} : ", mae)
    print("-"*50) # Just to make a boundary

R2 value for Random Forest :  77.27443900844257
MAE value for Random Forest :  9.117878511229645
--------------------------------------------------


In [13]:
Y_pred = model.predict(scaler.transform([[66,31.87,70.78,7.7,12.07]]))



In [14]:

Y_pred

array([53.2584014])

In [16]:
import pickle
pickle.dump(model,open("p_pred_model.pkl","wb"))

In [17]:
pickle.dump(scaler,open("p_pred_scaler.pkl","wb"))