<a href="https://colab.research.google.com/github/sarabhan/Cropify/blob/main/Crop_Recommendation_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# In this code block, we:-
*   Import libraries
*   Import dataset
*   Implement line encoding
*   Map the numbers to the crop name

In [2]:
#import libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler

#reading dataset
crop = pd.read_csv("Crop_recommendation.csv")

#here we perform line encoding
crop_dict = {
    'rice': 1, 'maize': 2, 'jute': 3, 'cotton': 4, 'coconut': 5, 'papaya': 6, 'orange': 7,
    'apple': 8, 'muskmelon': 9, 'watermelon': 10, 'grapes': 11, 'mango': 12, 'banana': 13,
    'pomegranate': 14, 'lentil': 15, 'blackgram': 16, 'mungbean': 17, 'mothbeans': 18,
    'pigeonpeas': 19, 'kidneybeans': 20, 'chickpea': 21, 'coffee': 22
}
#perform mapping
crop['crop_num'] = crop['label'].map(crop_dict)

# Next, we:-
*   Drop the output class from the dataset (create feature and target variable)
*   Split into train & test set
*   Normalize


In [3]:
#train test split
x = crop.drop(['crop_num', 'label'], axis=1)
y = crop['crop_num']
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)

# Normalize
scaler = StandardScaler() #mean = 0, sd = 1
xtrain_scaled = scaler.fit_transform(xtrain)
xtest_scaled = scaler.transform(xtest)

# Next step:
We train & predict using 2 models, i.e. Logistic Regression and Random Forest Classifier

In [4]:
#train by rfc & logistic
rfc = RandomForestClassifier(n_estimators=130, max_depth=5, random_state=40) #100 trees
rfc.fit(xtrain_scaled, ytrain)

lr = LogisticRegression(C=0.1)  # C is the regularization parameter
lr.fit(xtrain_scaled, ytrain)

#merge
rfc_pred = rfc.predict(xtest_scaled)
lr_pred = lr.predict(xtest_scaled)

#Next step:
We combine predictions of both models into one matrix

In [5]:
#merged prediction dataset
xtest_ensemble = np.column_stack((rfc_pred, lr_pred))

#train model with new merged dataset
meta_model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=6000)
meta_model.fit(xtest_ensemble, ytest)

#Next step:
Create a function that returns the final prediction

In [6]:
def recommendation_ensemble(N, P, k, temperature, humidity, ph, rainfall):
    features = np.array([[N, P, k, temperature, humidity, ph, rainfall]])
    transformed_features = scaler.transform(features)
    rfc_prediction = rfc.predict(transformed_features)
    lr_prediction = lr.predict(transformed_features)
    ensemble_features = np.column_stack((rfc_prediction, lr_prediction)) #converting 1D array into 2D
    ensemble_prediction = meta_model.predict(ensemble_features).reshape(1, -1) #It simply means that it is an unknown dimension and we want numpy to figure it out.
    #And numpy will figure this by looking at the 'length of the array and remaining dimensions' and making sure it satisfies the above mentioned criteria

    final_prediction = np.argmax(np.bincount(ensemble_prediction[0])) #returns indices of max element of array
    return final_prediction

#Input & Output

In [7]:
N = 55
P = 13
k = 33
temperature = 40.0
humidity = 17
ph = 10
rainfall = 20

ensemble_predict = recommendation_ensemble(N, P, k, temperature, humidity, ph, rainfall)
if ensemble_predict in crop_dict.values():
    recommended_crop = [key for key, value in crop_dict.items() if value == ensemble_predict][0]
    print("{} is a recommended crop based on the ensemble model.".format(recommended_crop))
else:
    print("Sorry, we are not able to recommend a proper crop for this environment.")

banana is a recommended crop based on the ensemble model.




In [8]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
accuracy = accuracy_score(ytest, meta_model.predict(xtest_ensemble))
precision = precision_score(ytest, meta_model.predict(xtest_ensemble), average='weighted', zero_division=1)
recall = recall_score(ytest, meta_model.predict(xtest_ensemble), average='weighted', zero_division=1)


print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

Accuracy: 0.7613636363636364
Precision: 0.8403555835656883
Recall: 0.7613636363636364
