# Crop Prediction 

### Importing Necessary Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
#importing Decision Tree from sklearn
from sklearn.tree import DecisionTreeClassifier
#importing Logistic regression 
from sklearn.linear_model import LogisticRegression
#importing Support vector machine
from sklearn.svm import SVC
#importing Gaussian Naive Bayes
from sklearn.naive_bayes import GaussianNB
#importing random forest
from sklearn.ensemble import RandomForestClassifier
#importing K-Nearest classifier
from sklearn.neighbors import KNeighborsClassifier

In [None]:
#lets import the Dataset
df = pd.read_csv("../input/crop-recommendation-dataset/Crop_recommendation.csv")

In [None]:
#overview of the dataset
df.head()

In [None]:
#checking for the NAN value in the data
df.isnull().sum()

In [None]:
#describing data or Statistical information of the data
df.describe()

In [None]:
#checking datatype
df.dtypes

In [None]:
#uniques in label column
print(df.label.unique())
print("Total unique vlaues in Label columns are: ",len(df.label.unique()))

In [None]:
#toatal counts of each unique value in label
df.label.value_counts()

### EDA from DATAPREP

In [None]:
pip install dataprep

In [None]:
#importing create report
from dataprep.eda import create_report

In [None]:
#creating automatic EDA using datapre
create_report(df)

### Preprocessing and Splitting Data

In [None]:
X = df.drop(["label"], axis = 1)
y = pd.DataFrame(df.label)
print(X.shape)
print(y.shape)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

for col in X.columns:
            x = np.array(X[col]).reshape(-1,1)
            X[col]=scaler.fit_transform(x)

In [None]:
#Splitting training data and testing data using train_test_split
X_train, X_test, y_train, y_test= train_test_split(X,y,random_state=42, test_size=0.2)

In [None]:
print("X_train: ",X_train.shape)
print("y_train: ",y_train.shape)
print("X_test: ",X_test.shape)
print("y_test: ",y_test.shape)

## Deploying models

### Decision Tree

In [None]:
#applying DT for Train data
dt = DecisionTreeClassifier()
model_dt = dt.fit(X_train, y_train)

In [None]:
#Checking Score of the model
score_dt=model_dt.score(X_test, y_test)
print("Accuracy for Decision Tree: ",score_dt)

In [None]:
#model prediction
y_pred_dt = model_dt.predict(X_test)

In [None]:
# generating report for classification result
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_dt))

### Logistic Regression

In [None]:
#applying Logistic regression for Training data
reg = LogisticRegression()
try model_reg = reg.fit(X_train, y_train)

In [None]:
#Checking Score of the model
score_reg=model_reg.score(X_test, y_test)
print("Accuracy for Lofistic regression: ",score_reg)

In [None]:
#model prediction
y_pred_reg = model_reg.predict(X_test)

In [None]:
# generating report for classification result
print(classification_report(y_test, y_pred_reg))

### SVM

In [None]:
#applying SVM for Training data
svm= SVC(kernel = 'linear')
model_svm = svm.fit(X_train, y_train)

In [None]:
#Checking Score of the model
score_svm=model_svm.score(X_test, y_test)
print("Accuracy for Lofistic regression: ",score_svm)

In [None]:
#model prediction
y_pred_svm = model_svm.predict(X_test)

In [None]:
# generating report for classification result
print(classification_report(y_test, y_pred_svm))

### Gaussian NB

In [None]:
#applying Gaussian NB for Training data
nb = GaussianNB()
model_nb = nb.fit(X_train, y_train)

In [None]:
#Checking Score of the model
score_nb=model_nb.score(X_test, y_test)
print("Accuracy for Lofistic regression: ",score_nb)

In [None]:
#model prediction
y_pred_nb = model_nb.predict(X_test)

In [None]:
# generating report for classification result
print(classification_report(y_test, y_pred_nb))

### KNeighbors

In [None]:
#applying KNeighbors for Training data
kn = KNeighborsClassifier(n_neighbors=10, metric='minkowski', p=2)
model_kn = kn.fit(X_train, y_train)

In [None]:
#Checking Score of the model
score_kn=model_kn.score(X_test, y_test)
print("Accuracy for Lofistic regression: ",score_kn)

In [None]:
#model prediction
y_pred_kn = model_kn.predict(X_test)

In [None]:
# generating report for classification result
print(classification_report(y_test, y_pred_kn))

### RandomForest

In [None]:
#applying KNeighbors for Training data
rf = RandomForestClassifier(n_estimators=100, criterion = 'entropy')
model_rf = rf.fit(X_train, y_train)

In [None]:
#Checking Score of the model
score_rf=model_rf.score(X_test, y_test)
print("Accuracy for Lofistic regression: ",score_rf)

In [None]:
#model prediction
y_pred_rf = model_rf.predict(X_test)

In [None]:
# generating report for classification result
print(classification_report(y_test, y_pred_rf))

In [None]:
# creating dictionarries for scores
total_score = {"DT":score_dt, "Log_reg":score_reg, "SVM":score_svm, "GaussianNB":score_nb, "K-Niegh":score_kn, "Rand_forest":score_rf}
total_score

In [None]:
model = list(total_score.keys())
score = list(total_score.values())

In [None]:
#ploting a Bar graph for different modes and its score
plt.figure(figsize=(10, 5))
plt.bar(model,score,color ='red',width=0.8)
plt.xlabel("Model Name")
plt.ylabel("Score")
plt.title("Scores for different model")
plt.show()

#### Conclusion: Random forest and Gaussian algorithm give better accuracy