In [43]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report


In [44]:
df = pd.read_csv("../data/crop_data_cleaned.csv")

In [45]:
feature_cols = [
    'Nitrogen', 'phosphorus', 'potassium',
    'temperature', 'humidity', 'ph',
    'rainfall'
]

X = df[feature_cols]
y = df['label']


In [46]:
#print(X)

print(y)


0         rice
1         rice
2         rice
3         rice
4         rice
         ...  
2195    coffee
2196    coffee
2197    coffee
2198    coffee
2199    coffee
Name: label, Length: 2200, dtype: object


In [47]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)


In [48]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)


In [49]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [50]:
knn = KNeighborsClassifier(n_neighbors=5)


In [51]:
knn.fit(X_train_scaled, y_train)

In [52]:
y_pred = knn.predict(X_test_scaled)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.9568181818181818
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        21
           2       0.95      0.95      0.95        20
           3       1.00      1.00      1.00        26
           4       1.00      1.00      1.00        27
           5       0.94      1.00      0.97        17
           6       0.89      1.00      0.94        17
           7       1.00      1.00      1.00        14
           8       0.81      0.96      0.88        23
           9       0.91      1.00      0.95        20
          10       0.69      1.00      0.81        11
          11       1.00      0.90      0.95        21
          12       0.90      1.00      0.95        19
          13       1.00      0.83      0.91        24
          14       1.00      1.00      1.00        19
          15       1.00      1.00      1.00        17
          16       1.00      1.00      1.00        1

In [53]:
sample_input = pd.DataFrame([{
    'Nitrogen': 90,
    'phosphorus': 42,
    'potassium': 43,
    'temperature': 20.8,
    'humidity': 82.0,
    'ph': 6.5,
    'rainfall': 202.9
}])

sample_scaled = scaler.transform(sample_input)
predicted_label = knn.predict(sample_scaled)
predicted_crop = le.inverse_transform(predicted_label)

predicted_crop


array(['rice'], dtype=object)