In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [2]:
df = pd.read_csv("/kaggle/input/crop-recommendation-dataset/Crop_recommendation.csv")
df

In [3]:
df.info()

In [4]:
df.isna().sum()

In [5]:
df.describe()

In [6]:
df.columns

In [7]:
df['label'].unique()

In [8]:
df.shape

In [9]:
sns.pairplot(data=df,hue='label')
plt.show()

In [10]:
numeric_df = df.drop(['label'],axis=1)
figure=plt.figure(figsize=(12,6))
sns.heatmap(numeric_df.corr(),annot=True,)

In [11]:
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])
df

In [12]:
class_labels = df['label'].unique().tolist()
class_labels = le.classes_
class_labels

In [13]:
label_dict = {}
for index,label in enumerate(class_labels):
    label_dict[label] = index
    
print(label_dict)

In [14]:
acc = []
model = []

In [15]:
x = df.drop('label',axis=1)
y = df['label']
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.1,shuffle=True ,random_state=42)

In [16]:
scaler=StandardScaler()
x_train=scaler.fit_transform(x_train)
x_train=pd.DataFrame(x_train,columns=x.columns)
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.1,shuffle=True ,random_state=42)

In [17]:
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(x_train,y_train)
y_pred = rf_model.predict(x_test)
rf_model.score(x_train,y_train),rf_model.score(x_test,y_test)

In [18]:
accs = accuracy_score(y_pred, y_test)
acc.append(accs)
model.append('rf_model')
print("Confusion Matrix:\n",confusion_matrix(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print('.:. Random Forest Classifier:'+'\033[1m {:.3f}%'.format(accs*100)+' .:.')

In [19]:
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(x_train,y_train)
y_pred1 = dt_model.predict(x_test)
dt_model.score(x_train,y_train),dt_model.score(x_test,y_test)

In [20]:
accs1 = accuracy_score(y_pred1, y_test)
acc.append(accs1)
model.append('dt_model')
print("Confusion Matrix:\n",confusion_matrix(y_test,y_pred1))
print("Classification Report:\n",classification_report(y_test,y_pred1))
print('.:. Decision Tree Classifier:'+'\033[1m {:.3f}%'.format(accs*100)+' .:.')

In [21]:
knn_model = KNeighborsClassifier()
knn_model.fit(x_train,y_train)
y_pred2 = knn_model.predict(x_test)
knn_model.score(x_train,y_train),knn_model.score(x_test,y_test)

In [22]:
accs2 = accuracy_score(y_pred2, y_test)
acc.append(accs2)
model.append('knn_model')
print("Confusion Matrix:\n",confusion_matrix(y_test,y_pred2))
print("Classification Report:\n",classification_report(y_test,y_pred2))
print('.:. K Neighbors Classifier:'+'\033[1m {:.3f}%'.format(accs2*100)+' .:.')

In [23]:
plt.figure(figsize=[10,5],dpi = 100)
plt.title('Accuracy Comparison')
plt.xlabel('Accuracy')
plt.ylabel('Algorithm')
sns.barplot(x = acc,y = model,palette='dark')

In [24]:
features = np.array([[104,18, 30, 23.603016, 60.3, 6.7, 140.91]])
Recommended_Crop = rf_model.predict(features)
print("Recommended Crop:",class_labels[Recommended_Crop])

In [25]:
import joblib

# Assuming rf_model is your trained RandomForestClassifier model
# Save the model to a file
joblib.dump(rf_model, 'rf_model.pkl')

