In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import warnings



In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
file_path = "/content/drive/MyDrive/Colab Notebooks/csv/diabetes.csv"
df = pd.read_csv(file_path)
display(df)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [6]:
df.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [7]:
target_column = "Outcome"
X = df.drop(columns=[target_column])
y = df[target_column]


In [8]:

# 데이터 분할 및 전처리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [9]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)
comparison_df = pd.DataFrame({"Actual": y_test, "Predicted": dt_pred})

print("\nDecision Tree Model")
print(f"Accuracy: {accuracy_score(y_test, dt_pred):.4f}")
print("Classification Report:")
print(classification_report(y_test, dt_pred))
print("\nActual VS Predicted")
display(comparison_df)


Decision Tree Model
Accuracy: 0.7468
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.76      0.79        99
           1       0.62      0.73      0.67        55

    accuracy                           0.75       154
   macro avg       0.73      0.74      0.73       154
weighted avg       0.76      0.75      0.75       154


Actual VS Predicted


Unnamed: 0,Actual,Predicted
668,0,1
324,0,0
624,0,0
690,0,0
473,0,0
...,...,...
355,1,1
534,0,0
344,0,1
296,1,0


In [10]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
comparison_df = pd.DataFrame({"Actual": y_test.values, "Predicted": dt_pred})
print("\nRandom Forest Model")
print(f"Accuracy: {accuracy_score(y_test, rf_pred):.4f}")
print("Classification Report:")
print(classification_report(y_test, rf_pred))
print("\nActual VS Predicted")
display(comparison_df)


Random Forest Model
Accuracy: 0.7208
Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.78      0.78        99
           1       0.61      0.62      0.61        55

    accuracy                           0.72       154
   macro avg       0.70      0.70      0.70       154
weighted avg       0.72      0.72      0.72       154


Actual VS Predicted


Unnamed: 0,Actual,Predicted
0,0,1
1,0,0
2,0,0
3,0,0
4,0,0
...,...,...
149,1,1
150,0,0
151,0,1
152,1,0


In [12]:
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)
comparison_df = pd.DataFrame({"Actual": y_test.values, "Predicted": dt_pred})
print("\nLogistic Regression Model")
print(f"Accuracy: {accuracy_score(y_test, lr_pred):.4f}")
print("Classification Report:")
print(classification_report(y_test, lr_pred))
print("\nActual VS Predicted")
display(comparison_df)


Logistic Regression Model
Accuracy: 0.7532
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.80      0.81        99
           1       0.65      0.67      0.66        55

    accuracy                           0.75       154
   macro avg       0.73      0.74      0.73       154
weighted avg       0.76      0.75      0.75       154


Actual VS Predicted


Unnamed: 0,Actual,Predicted
0,0,1
1,0,0
2,0,0
3,0,0
4,0,0
...,...,...
149,1,1
150,0,0
151,0,1
152,1,0


In [11]:
from sklearn.svm import SVC

svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)
comparison_df = pd.DataFrame({"Actual": y_test.values, "Predicted": dt_pred})
print("\nSupport Vector Machine Model")
print(f"Accuracy: {accuracy_score(y_test, svm_pred):.4f}")
print("Classification Report:")
print(classification_report(y_test, svm_pred))
print("\nActual VS Predicted")
display(comparison_df)


Support Vector Machine Model
Accuracy: 0.7338
Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.83      0.80        99
           1       0.65      0.56      0.60        55

    accuracy                           0.73       154
   macro avg       0.71      0.70      0.70       154
weighted avg       0.73      0.73      0.73       154


Actual VS Predicted


Unnamed: 0,Actual,Predicted
0,0,1
1,0,0
2,0,0
3,0,0
4,0,0
...,...,...
149,1,1
150,0,0
151,0,1
152,1,0


In [13]:
from sklearn.neighbors import KNeighborsClassifier

knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)
knn_pred = knn_model.predict(X_test)
comparison_df = pd.DataFrame({"Actual": y_test.values, "Predicted": dt_pred})
print("\nK-Nearest Neighbors Model")
print(f"Accuracy: {accuracy_score(y_test, knn_pred):.4f}")
print("Classification Report:")
print(classification_report(y_test, knn_pred))
print("\nActual VS Predicted")
display(comparison_df)
warnings.simplefilter(action='ignore', category=FutureWarning)


K-Nearest Neighbors Model
Accuracy: 0.6948
Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.80      0.77        99
           1       0.58      0.51      0.54        55

    accuracy                           0.69       154
   macro avg       0.66      0.65      0.66       154
weighted avg       0.69      0.69      0.69       154


Actual VS Predicted


Unnamed: 0,Actual,Predicted
0,0,1
1,0,0
2,0,0
3,0,0
4,0,0
...,...,...
149,1,1
150,0,0
151,0,1
152,1,0
