In [51]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time

import warnings
warnings.filterwarnings("ignore")

In [52]:
import pickle

In [53]:
df = pd.read_csv("Crop_recommendation.csv")

In [54]:
c=df.label.astype('category')
targets = dict(enumerate(c.cat.categories))
df['target']=c.cat.codes

y=df.label #Target.
X=df[['N','P','K','temperature','humidity','ph','rainfall']] #Fields used in ML algo.

# Cross validation

In [55]:
from sklearn.model_selection import train_test_split

In [56]:
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=1)

## Pre processing

In [57]:
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()
X_train_scaled = min_max_scaler.fit_transform(X_train)
X_test_scaled = min_max_scaler.transform(X_test)

# Sample data

In [58]:
# Select random values in the dataset to test predictions below
data = df.iloc[2100]
print(data[7])
print('\n')

# Slice the features for prediction
input_test = data[:-2]
print(input_test)
print('\n')

# Format the data to use in models
prediction_data = input_test.to_numpy().reshape(1, -1)
scaled_prediction_data = min_max_scaler.transform(prediction_data)
print(prediction_data)
print(scaled_prediction_data)

coffee


N                      91
P                      21
K                      26
temperature      26.33378
humidity          57.3647
ph               7.261314
rainfall       191.654941
Name: 2100, dtype: object


[[91 21 26 26.33377983 57.36469955 7.261313694 191.6549412]]
[[0.65       0.11428571 0.105      0.50238727 0.502855   0.58382762
  0.61572561]]


In [59]:
df['label'].nunique()

22

## KNN

In [60]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train_scaled, y_train)

In [61]:
knn = KNeighborsClassifier(n_neighbors = 6)
knn.fit(X_train_scaled, y_train)

In [62]:
from sklearn.metrics import classification_report
knnPredictor = knn.predict(X_test_scaled)

In [63]:
knn.predict(scaled_prediction_data)

array(['coffee'], dtype=object)

# SVC

In [64]:
from sklearn.svm import SVC

svc_linear = SVC(kernel='linear').fit(X_train_scaled, y_train)
svc_rbf = SVC(kernel='rbf').fit(X_train_scaled, y_train)
svc_poly = SVC(kernel='poly').fit(X_train_scaled, y_train)

In [65]:
svc_linear.predict(scaled_prediction_data)

array(['coffee'], dtype=object)

# Decision tree

In [66]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(random_state=42).fit(X_train, y_train)

In [67]:
dt.predict(prediction_data)

array(['coffee'], dtype=object)

# Random forest

In [68]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(max_depth=5, n_estimators=100, random_state=42).fit(X_train, y_train)


In [69]:
clf.predict(prediction_data)

array(['coffee'], dtype=object)

# XGBoost

In [70]:
from sklearn.ensemble import GradientBoostingClassifier
gboost = GradientBoostingClassifier().fit(X_train, y_train)

In [71]:
gboost.predict(prediction_data)

array(['coffee'], dtype=object)

# Ensembling

In [72]:
from sklearn.ensemble import VotingClassifier

In [73]:
ensemble_classifier_h = VotingClassifier(estimators= [
    ('knn', knn), 
    ('svc_linear', svc_linear), 
    ('svc_poly', svc_poly), 
    ('svc_rbf', svc_rbf), 
    ('dt', dt), 
    ('rf', clf), 
    ('xgboost', gboost)
    ],
    voting='hard')

ensemble_classifier_h = ensemble_classifier_h.fit(X,y)

In [74]:
ensemble_classifier_h.score(X, y)

0.9945454545454545

In [75]:
ensemble_classifier_h.predict(prediction_data)

array(['coffee'], dtype=object)

In [76]:
def crop_prediction(x):
    return ensemble_classifier_h.predict(x)

In [77]:
crop_prediction(prediction_data)

array(['coffee'], dtype=object)

In [78]:
# pickle.dump(ensemble_classifier_h, open("model.pkl", "wb"))

# TEST INDIVIDUALLY

In [79]:
start = "\033[1m"
end = "\033[0;0m"

In [80]:
def modelExeTime(model, data):
    st = time.time()
    time.sleep(1)
    if model == knn:
        scaled_pred_data = min_max_scaler.transform(data)
        print(knn.predict(scaled_pred_data))
        
    elif model == svc_linear:
        scaled_pred_data = min_max_scaler.transform(data)
        print(svc_linear.predict(scaled_pred_data))
        
    elif model == svc_rbf:
        scaled_pred_data = min_max_scaler.transform(data)
        print(svc_rbf.predict(scaled_pred_data))
        
    elif model == svc_poly:
        scaled_pred_data = min_max_scaler.transform(data)
        print(svc_poly.predict(scaled_pred_data))
        
    elif model == dt or clf or gboost:
        print(model.predict(data))
    else:
        print("Error occured")
    et = time.time()
    elapsed_time = et - st
    print('Execution time:', elapsed_time, 'seconds')

In [81]:
grouped_models = [knn,svc_linear, svc_rbf, svc_poly, dt, clf, gboost]

In [82]:
for x in grouped_models:
    print(f"{start} {x} {end}")
    modelExeTime(x, prediction_data)
    print('\n')

[1m KNeighborsClassifier(n_neighbors=6) [0;0m
['coffee']
Execution time: 1.0027921199798584 seconds


[1m SVC(kernel='linear') [0;0m
['coffee']
Execution time: 1.0007312297821045 seconds


[1m SVC() [0;0m
['coffee']
Execution time: 1.0022213459014893 seconds


[1m SVC(kernel='poly') [0;0m
['coffee']
Execution time: 1.0005674362182617 seconds


[1m DecisionTreeClassifier(random_state=42) [0;0m
['coffee']
Execution time: 1.0003535747528076 seconds


[1m RandomForestClassifier(max_depth=5, random_state=42) [0;0m
['coffee']
Execution time: 1.010871171951294 seconds


[1m GradientBoostingClassifier() [0;0m
['coffee']
Execution time: 1.0032143592834473 seconds




In [83]:
scaled_pred_data = min_max_scaler.transform(prediction_data)
knn.predict(scaled_pred_data)

array(['coffee'], dtype=object)