In [None]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
# Step 2: Load the dataset
# Assuming the dataset is saved as 'fruit.csv' in the same directory
data = pd.read_csv('/content/fruit.csv')

In [None]:
data.head()

Unnamed: 0,fruit_label,fruit_name,fruit_subtype,mass,width,height,color_score
0,1,apple,granny_smith,192,8.4,7.3,0.55
1,1,apple,granny_smith,180,8.0,6.8,0.59
2,1,apple,granny_smith,176,7.4,7.2,0.6
3,2,mandarin,mandarin,86,6.2,4.7,0.8
4,2,mandarin,mandarin,84,6.0,4.6,0.79


In [None]:
# Step 3: Data Preparation
# Selecting the features and the target label
X = data[['mass', 'width', 'height', 'color_score']]
y = data['fruit_label']

In [None]:
X.head()

Unnamed: 0,mass,width,height,color_score
0,192,8.4,7.3,0.55
1,180,8.0,6.8,0.59
2,176,7.4,7.2,0.6
3,86,6.2,4.7,0.8
4,84,6.0,4.6,0.79


In [None]:
y.head()

Unnamed: 0,fruit_label
0,1
1,1
2,1
3,2
4,2


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59 entries, 0 to 58
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   fruit_label    59 non-null     int64  
 1   fruit_name     59 non-null     object 
 2   fruit_subtype  59 non-null     object 
 3   mass           59 non-null     int64  
 4   width          59 non-null     float64
 5   height         59 non-null     float64
 6   color_score    59 non-null     float64
dtypes: float64(3), int64(2), object(2)
memory usage: 3.4+ KB


In [None]:
data.describe()

Unnamed: 0,fruit_label,mass,width,height,color_score
count,59.0,59.0,59.0,59.0,59.0
mean,2.542373,163.118644,7.105085,7.69322,0.762881
std,1.208048,55.018832,0.816938,1.361017,0.076857
min,1.0,76.0,5.8,4.0,0.55
25%,1.0,140.0,6.6,7.2,0.72
50%,3.0,158.0,7.2,7.6,0.75
75%,4.0,177.0,7.5,8.2,0.81
max,4.0,362.0,9.6,10.5,0.93


In [None]:
# Step 4: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Step 5: Initialize and train the KNN classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

In [None]:
# Step 6: Make predictions on the test set
y_pred = knn.predict(X_test)

In [None]:

accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred,zero_division=0)
conf_matrix = confusion_matrix(y_test, y_pred)

In [None]:
# Display results
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_rep)
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.4444444444444444
Classification Report:
               precision    recall  f1-score   support

           1       0.75      0.60      0.67         5
           2       0.00      0.00      0.00         4
           3       0.50      0.75      0.60         4
           4       0.25      0.40      0.31         5

    accuracy                           0.44        18
   macro avg       0.38      0.44      0.39        18
weighted avg       0.39      0.44      0.40        18

Confusion Matrix:
 [[3 0 1 1]
 [0 0 0 4]
 [0 0 3 1]
 [1 0 2 2]]


In [None]:
# Step 8: Function to classify a new fruit based on its features
def classify_fruit(mass, width, height, color_score):
    new_data = np.array([[mass, width, height, color_score]])  # Reshape for single sample
    prediction = knn.predict(new_data)
    return prediction[0]

In [None]:
# Example prediction
predicted_label = classify_fruit(160, 7.5, 7.5, 0.80)
print(f"The predicted label for a fruit with mass=160, width=7.5, height=7.5, color_score=0.80 is: {predicted_label}")

The predicted label for a fruit with mass=160, width=7.5, height=7.5, color_score=0.80 is: 1




In [None]:
print(X)
print(y)

    mass  width  height  color_score
0    192    8.4     7.3         0.55
1    180    8.0     6.8         0.59
2    176    7.4     7.2         0.60
3     86    6.2     4.7         0.80
4     84    6.0     4.6         0.79
5     80    5.8     4.3         0.77
6     80    5.9     4.3         0.81
7     76    5.8     4.0         0.81
8    178    7.1     7.8         0.92
9    172    7.4     7.0         0.89
10   166    6.9     7.3         0.93
11   172    7.1     7.6         0.92
12   154    7.0     7.1         0.88
13   164    7.3     7.7         0.70
14   152    7.6     7.3         0.69
15   156    7.7     7.1         0.69
16   156    7.6     7.5         0.67
17   168    7.5     7.6         0.73
18   162    7.5     7.1         0.83
19   162    7.4     7.2         0.85
20   160    7.5     7.5         0.86
21   156    7.4     7.4         0.84
22   140    7.3     7.1         0.87
23   170    7.6     7.9         0.88
24   342    9.0     9.4         0.75
25   356    9.2     9.2         0.75
2