In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

# Baca data dari file CSV
df = pd.read_csv('fastfood_calories.csv')

# Tampilkan data sampel
print(df.head())

# Drop kolom yang tidak diperlukan
df.drop('Item', axis=1, inplace=True)

# Handle missing values
df.fillna(df.median(), inplace=True)

# Pisahkan atribut dari label
X = df.drop('Category', axis=1)
y = df['Category']

# Bagi data menjadi data pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Penskalaan data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Latih model K-Nearest Neighbors dengan k=3
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Prediksi kelas
y_pred = knn.predict(X_test)

# Evaluasi model
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
roc_auc = roc_auc_score(y_test, knn.predict_proba(X_test), multi_class='ovr')
cross_val_scores = cross_val_score(knn, X_train, y_train, cv=10, scoring='accuracy')
classification_rep = classification_report(y_test, y_pred)

# Tampilkan hasil evaluasi
print(f'Model accuracy score: {accuracy:.4f}')
print(f'Confusion matrix:\n{confusion}')
print(f'ROC AUC: {roc_auc:.4f}')
print(f'Cross-validated accuracy: {cross_val_scores.mean():.4f}')
print(f'Classification report:\n{classification_rep}')

# Plot ROC Curve
fpr, tpr, thresholds = roc_curve(y_test, knn.predict_proba(X_test), pos_label=knn.classes_[1])
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.show()


   Unnamed: 0 restaurant                                       item  calories  \
0           1  Mcdonalds           Artisan Grilled Chicken Sandwich       380   
1           2  Mcdonalds             Single Bacon Smokehouse Burger       840   
2           3  Mcdonalds             Double Bacon Smokehouse Burger      1130   
3           4  Mcdonalds  Grilled Bacon Smokehouse Chicken Sandwich       750   
4           5  Mcdonalds   Crispy Bacon Smokehouse Chicken Sandwich       920   

   cal_fat  total_fat  sat_fat  trans_fat  cholesterol  sodium  total_carb  \
0       60          7      2.0        0.0           95    1110          44   
1      410         45     17.0        1.5          130    1580          62   
2      600         67     27.0        3.0          220    1920          63   
3      280         31     10.0        0.5          155    1940          62   
4      410         45     12.0        0.5          120    1980          81   

   fiber  sugar  protein  vit_a  vit_c  calc

KeyError: ignored