In [1]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split as tts
from sklearn.neighbors import KNeighborsClassifier as knn
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import numpy as np


### Load and Explore the Dataset

In this step, we'll load the training dataset and have a look at its structure.


In [2]:
# Load the dataset
df = pd.read_csv( "/kaggle/input/classify-fruits-fall-2024-25-b-1/fruits_train.csv" )
df.head()



Unnamed: 0,Id,mass,width,height,label
0,1,160,7.1,7.6,2
1,2,194,7.2,10.3,3
2,3,154,7.2,7.2,2
3,4,154,7.0,7.1,1
4,5,162,7.4,7.2,1


### Data Preprocessing

Here, we'll separate the features and the labels, and apply standard scaling to the feature set.


In [3]:
# Separate features and labels
X = df.drop(columns=['Id', 'label'])
Y = df['label']

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)


### Train the K Nearest Neighbors (KNN) Model

We'll train the KNN model using the training data. For this example, we'll start with 1 neighbor.


In [4]:
# Initialize and train the KNN model
Model = knn(n_neighbors=1)
Model.fit(X, Y)


### Evaluate the Model

We evaluate the model's performance on the training data to check the accuracy.


In [5]:
# Predict on the training data
y_Predicted = Model.predict(X)

# Calculate accuracy
accuracy = accuracy_score(Y, y_Predicted)
print(f'Training Accuracy: {accuracy:.2f}')

# Display classification report
print(classification_report(Y, y_Predicted))


Training Accuracy: 1.00
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        15
           2       1.00      1.00      1.00        13
           3       1.00      1.00      1.00        12

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40



### Test the Model with the Test Dataset

Now, we'll load the test dataset, apply the same transformations, and predict the labels.


In [6]:
# Load the test dataset
test = pd.read_csv('/kaggle/input/classify-fruits-fall-2024-25-b-1/fruits_test.csv')

# Preprocess the test data
xtest = test.drop(columns=['Id'])
xtest = scaler.transform(xtest)

# Predict on the test data
ytest = Model.predict(xtest)


### Save the Predictions

Finally, we'll save the predictions in the required format.


In [7]:
# Create output DataFrame and save to CSV
output = pd.DataFrame({'ID': test.Id, 'Category': ytest})
output.to_csv('submission.csv', index=False)
