In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [None]:
df = pd.read_csv('./fruit_classification_dataset.csv')
print(df.head)
print(df.columns)
print(df.describe())

In [None]:
# check unique values in each String attribute
print(pd.unique(df['shape']))
print(pd.unique(df['color']))
print(pd.unique(df['taste']))

In [None]:
# mapping = {
#     'shape': {'round': 0, 'oval': 1, 'long': 2},
#     'color': {'green': 0, 'orange': 1, 'yellow': 2, 'red': 3, 'purple': 4, 'blue': 5, 'brown': 6, 'pink': 7},
#     'taste': {'sweet': 0, 'tangy': 1, 'sour': 2}
# }
le = LabelEncoder()
# Encode each column separately
df['shape_encoded'] = le.fit_transform(df['shape'])
df['color_encoded'] = le.fit_transform(df['color'])
df['taste_encoded'] = le.fit_transform(df['taste'])
print(df[['shape', 'shape_encoded', 'color', 'color_encoded', 'taste', 'taste_encoded']].head())
df[['shape_encoded', 'color_encoded', 'taste_encoded']] = df[['shape_encoded','color_encoded','taste_encoded']].astype(float)

print(df[['shape_encoded', 'color_encoded', 'taste_encoded']])

In [None]:
# df['shape_transformed'] = df['shape'].map(mapping['shape'])
# df['color_transformed'] = df['color'].map(mapping['color'])
# df['taste_transformed'] = df['taste'].map(mapping['taste'])
# print(df.head())


In [None]:
# Independent Variable
x = df[['size (cm)', 'shape_encoded', 'weight (g)', 'avg_price (₹)', 'color_encoded', 'taste_encoded']]
# Dependent Variable
Y = df['fruit_name']

# Remove duplicate rows before splitting
df = df.drop_duplicates()

# Then shuffle (just to be safe)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

Xtrain, Xtest, Ytrain, Ytest = train_test_split(x, Y, test_size=0.3, random_state=42, stratify=Y)

In [None]:
# Scale only numeric columns
scaler = StandardScaler()
num_cols = ['size (cm)', 'weight (g)', 'avg_price (₹)']
Xtrain[num_cols] = scaler.fit_transform(Xtrain[num_cols])
Xtest[num_cols] = scaler.transform(Xtest[num_cols])
# Xtrain_scaler = scaler.fit_transform(Xtrain)
# Xtest_scaler = scaler.transform(Xtest)

In [None]:
# Naive Bayes Classifier
nb = GaussianNB()
nb.fit(Xtrain, Ytrain)
y_predict_nb = nb.predict(Xtest)

print("=== Naive Bayes Metrics ===")
accuracy = accuracy_score(Ytest, y_predict_nb)
precision = precision_score(Ytest, y_predict_nb, average='macro')
recall = recall_score(Ytest, y_predict_nb, average='macro')
f1 = f1_score(Ytest, y_predict_nb, average='macro')
classification_rep = classification_report(Ytest, y_predict_nb)
print('Accuracy: ', accuracy)
print('Precision: ', precision)
print('Recall: ',recall)
print('F1 Score: ', f1)
print('classification report: ', classification_rep)

In [None]:
# K-Nearest Neighbors Classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(Xtrain, Ytrain)
y_predict_knn = knn.predict(Xtest)

print("=== KNN Metrics ===")
accuracy = accuracy_score(Ytest, y_predict_knn)
precision = precision_score(Ytest, y_predict_knn, average='macro')
recall = recall_score(Ytest, y_predict_knn, average='macro')
f1 = f1_score(Ytest, y_predict_knn, average='macro')
classification_rep = classification_report(Ytest, y_predict_knn)
print('Accuracy: ', accuracy)
print('Precision: ', precision)
print('Recall: ',recall)
print('F1 Score: ', f1)
print('classification report: ' ,classification_rep)

In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load dataset
df = pd.read_csv('./fruit_classification_dataset.csv')

shape_le = LabelEncoder()
color_le = LabelEncoder()
taste_le = LabelEncoder()
fruit_le = LabelEncoder()

df['shape_encoded'] = shape_le.fit_transform(df['shape'])
df['color_encoded'] = color_le.fit_transform(df['color'])
df['taste_encoded'] = taste_le.fit_transform(df['taste'])
df['fruit_name_encoded'] = fruit_le.fit_transform(df['fruit_name'])


# print(df[['shape', 'shape_encoded', 'color', 'color_encoded', 'taste', 'taste_encoded', 'fruit_name', 'fruit_name_encoded']].head())

# Define features and target
X = df[['size (cm)', 'shape_encoded', 'weight (g)', 'avg_price (₹)',
        'color_encoded', 'taste_encoded']]
Y = df['fruit_name_encoded']

# Split dataset
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=1/3, random_state=42)

# ===============================
# Naive Bayes Classifier
# ===============================
# nb = GaussianNB()
# nb.fit(X_train, Y_train)
# y_pred_nb = nb.predict(X_test)

# print("\n=== Naive Bayes Metrics ===")
# print("Accuracy:", accuracy_score(Y_test, y_pred_nb))
# print("Precision:", precision_score(Y_test, y_pred_nb, average='macro'))
# print("Recall:", recall_score(Y_test, y_pred_nb, average='macro'))
# print("F1 Score:", f1_score(Y_test, y_pred_nb, average='macro'))
# print("Classification Report:\n", classification_report(Y_test, y_pred_nb))

# ===============================
# K-Nearest Neighbors Classifier
# ===============================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, Y_train)
y_pred_knn = knn.predict(X_test_scaled)

print("\n=== KNN Metrics ===")
print("Accuracy:", accuracy_score(Y_test, y_pred_knn))
print("Precision:", precision_score(Y_test, y_pred_knn, average='macro'))
print("Recall:", recall_score(Y_test, y_pred_knn, average='macro'))
print("F1 Score:", f1_score(Y_test, y_pred_knn, average='macro'))
print("Classification Report:\n", classification_report(Y_test, y_pred_knn))



=== KNN Metrics ===
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       171
           1       1.00      1.00      1.00       173
           2       1.00      1.00      1.00       168
           3       1.00      1.00      1.00       147
           4       1.00      1.00      1.00       180
           5       1.00      1.00      1.00       175
           6       1.00      1.00      1.00       157
           7       1.00      1.00      1.00       184
           8       1.00      1.00      1.00       143
           9       1.00      1.00      1.00       162
          10       1.00      1.00      1.00       161
          11       1.00      1.00      1.00       168
          12       1.00      1.00      1.00       150
          13       1.00      1.00      1.00       168
          14       1.00      1.00      1.00       180
          15       1.00      1.00  