In [7]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

df = pd.read_csv("TShirt_size.csv")

print("Dataset Shape:", df.shape)
print(df.head())

X = df[['Height (in cms)', 'Weight (in kgs)']]
y = df['T Shirt Size']

le = LabelEncoder()
y_encoded = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded,
    test_size=0.4,
    random_state=42,
    stratify=y_encoded
)

print("\nTraining samples:", len(X_train))
print("Test samples:", len(X_test))

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

max_k = len(X_train)
print("\nTesting K from 1 to", max_k)

best_k = 1
best_score = 0

for k in range(1, max_k + 1):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    score = knn.score(X_test, y_test)
    print(f"K={k} → Accuracy={score:.3f}")

    if score > best_score:
        best_score = score
        best_k = k

print("\nBest K selected:", best_k)

knn = KNeighborsClassifier(n_neighbors=best_k)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))

X_scaled = scaler.fit_transform(X)
cv_model = KNeighborsClassifier(n_neighbors=best_k)

cv_scores = cross_val_score(cv_model, X_scaled, y_encoded, cv=3)

print("\nCross Validation Scores:", cv_scores)
print("Mean CV Accuracy:", cv_scores.mean())

new_person = [[170, 65]]
new_scaled = scaler.transform(new_person)

pred = knn.predict(new_scaled)
print("\nPredicted T-shirt size:", le.inverse_transform(pred)[0])


Dataset Shape: (18, 3)
   Height (in cms)  Weight (in kgs) T Shirt Size
0              158               58            M
1              158               59            M
2              158               63            M
3              160               59            M
4              160               60            M

Training samples: 10
Test samples: 8

Testing K from 1 to 10
K=1 → Accuracy=0.875
K=2 → Accuracy=0.875
K=3 → Accuracy=0.875
K=4 → Accuracy=0.875
K=5 → Accuracy=1.000
K=6 → Accuracy=1.000
K=7 → Accuracy=1.000
K=8 → Accuracy=0.625
K=9 → Accuracy=0.625
K=10 → Accuracy=0.625

Best K selected: 5

Accuracy: 1.0

Confusion Matrix:
[[5 0]
 [0 3]]

Classification Report:
              precision    recall  f1-score   support

           L       1.00      1.00      1.00         5
           M       1.00      1.00      1.00         3

    accuracy                           1.00         8
   macro avg       1.00      1.00      1.00         8
weighted avg       1.00      1.00      1.00  

