In [2]:
import pandas as pd
from google.colab import drive

drive.mount('/content/drive')

file_path = "/content/drive/MyDrive/diabetes.xlsx"
df = pd.read_excel(file_path)
df.head()

Mounted at /content/drive


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
#Decision Tree with random state
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import numpy as np

file_path = "/content/drive/MyDrive/diabetes.xlsx"

results = []

base_random_state = 12345

for i in range(1, 6):
    train_sheet = f"train{i}"
    test_sheet = f"test{i}"

    train = pd.read_excel(file_path, sheet_name=train_sheet)
    test = pd.read_excel(file_path, sheet_name=test_sheet)

    data = pd.concat([train, test], ignore_index=True)

    X = data.drop(columns=["Outcome"])
    y = data["Outcome"]

    current_random_state = base_random_state + (i * 10000)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=current_random_state)

    clf = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=current_random_state)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    results.append({
        "Sheet": f"train{i} & test{i}",
        "Random State": current_random_state,
        "Precision": precision,
        "Recall": recall,
        "F1-Score": f1,
        "Accuracy": accuracy
    })

for result in results:
    print(f"Evaluasi untuk {result['Sheet']} (Random State: {result['Random State']}):")
    print(f"  Precision: {result['Precision']:.4f}")
    print(f"  Recall: {result['Recall']:.4f}")
    print(f"  F1-Score: {result['F1-Score']:.4f}")
    print(f"  Accuracy: {result['Accuracy']:.4f}")
    print()

avg_precision = np.mean([result["Precision"] for result in results])
avg_recall = np.mean([result["Recall"] for result in results])
avg_f1 = np.mean([result["F1-Score"] for result in results])
avg_accuracy = np.mean([result["Accuracy"] for result in results])

print("Rata-rata dari semua evaluasi:")
print(f"  Rata-rata Precision: {avg_precision:.4f}")
print(f"  Rata-rata Recall: {avg_recall:.4f}")
print(f"  Rata-rata F1-Score: {avg_f1:.4f}")
print(f"  Rata-rata Accuracy: {avg_accuracy:.4f}")

Evaluasi untuk train1 & test1 (Random State: 22345):
  Precision: 0.6705
  Recall: 0.5364
  F1-Score: 0.5960
  Accuracy: 0.7403

Evaluasi untuk train2 & test2 (Random State: 32345):
  Precision: 0.7374
  Recall: 0.6518
  F1-Score: 0.6919
  Accuracy: 0.7890

Evaluasi untuk train3 & test3 (Random State: 42345):
  Precision: 0.6667
  Recall: 0.5500
  F1-Score: 0.6027
  Accuracy: 0.7175

Evaluasi untuk train4 & test4 (Random State: 52345):
  Precision: 0.7253
  Recall: 0.5946
  F1-Score: 0.6535
  Accuracy: 0.7727

Evaluasi untuk train5 & test5 (Random State: 62345):
  Precision: 0.7529
  Recall: 0.5926
  F1-Score: 0.6632
  Accuracy: 0.7890

Rata-rata dari semua evaluasi:
  Rata-rata Precision: 0.7105
  Rata-rata Recall: 0.5851
  Rata-rata F1-Score: 0.6415
  Rata-rata Accuracy: 0.7617


In [4]:
#Artificial Neural Network with random state
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

file_path = "/content/drive/MyDrive/diabetes.xlsx"

results = []

base_random_state = 12345

for i in range(1, 6):
    train_sheet = f"train{i}"
    test_sheet = f"test{i}"

    train = pd.read_excel(file_path, sheet_name=train_sheet)
    test = pd.read_excel(file_path, sheet_name=test_sheet)

    data = pd.concat([train, test], ignore_index=True)

    X = data.drop(columns=["Outcome"])
    y = data["Outcome"]

    current_random_state = base_random_state + (i * 10000)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=current_random_state)

    model = Sequential()
    model.add(Dense(12, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    model.fit(X_train, y_train, epochs=100, batch_size=10, verbose=0)

    y_pred_prob = model.predict(X_test)
    y_pred = (y_pred_prob > 0.5).astype(int)

    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    results.append({
        "Sheet": f"train{i} & test{i}",
        "Random State": current_random_state,
        "Precision": precision,
        "Recall": recall,
        "F1-Score": f1,
        "Accuracy": accuracy
    })

for result in results:
    print(f"Evaluasi untuk {result['Sheet']} (Random State: {result['Random State']}):")
    print(f"  Precision: {result['Precision']:.4f}")
    print(f"  Recall: {result['Recall']:.4f}")
    print(f"  F1-Score: {result['F1-Score']:.4f}")
    print(f"  Accuracy: {result['Accuracy']:.4f}")
    print()

avg_precision = np.mean([result["Precision"] for result in results])
avg_recall = np.mean([result["Recall"] for result in results])
avg_f1 = np.mean([result["F1-Score"] for result in results])
avg_accuracy = np.mean([result["Accuracy"] for result in results])

print("Rata-rata dari semua evaluasi:")
print(f"  Rata-rata Precision: {avg_precision:.4f}")
print(f"  Rata-rata Recall: {avg_recall:.4f}")
print(f"  Rata-rata F1-Score: {avg_f1:.4f}")
print(f"  Rata-rata Accuracy: {avg_accuracy:.4f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
Evaluasi untuk train1 & test1 (Random State: 22345):
  Precision: 0.6016
  Recall: 0.7000
  F1-Score: 0.6471
  Accuracy: 0.7273

Evaluasi untuk train2 & test2 (Random State: 32345):
  Precision: 0.6855
  Recall: 0.7589
  F1-Score: 0.7203
  Accuracy: 0.7857

Evaluasi untuk train3 & test3 (Random State: 42345):
  Precision: 0.7356
  Recall: 0.5333
  F1-Score: 0.6184
  Accuracy: 0.7435

Evaluasi untuk train4 & test4 (Random State: 52345):
  Precision: 0.6116
  Recall: 0.6667
  F1-Score: 0.6379
  Accuracy: 0.7273

Evaluasi untuk train5 & test5 (Random State: 62345):
  Precision: 0.7143
  Recall: 0.5093
  F1-Score: 0.5946
  Accuracy: 0.7565

Rata-rata dari semua evaluasi:
  Rata-rata Precision: 0.6697
  Rata-rata Recall: 0.6336
  Rata-rata F1-Score: 0.6437
  Rata-rata Accuracy: 0.7481


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
import numpy as np

file_path = "/content/drive/MyDrive/diabetes.xlsx"

results = []

base_random_state = 12345

for i in range(1, 6):
    train_sheet = f"train{i}"
    test_sheet = f"test{i}"

    train = pd.read_excel(file_path, sheet_name=train_sheet)
    test = pd.read_excel(file_path, sheet_name=test_sheet)

    data = pd.concat([train, test], ignore_index=True)

    X = data.drop(columns=["Outcome"])
    y = data["Outcome"]

    current_random_state = base_random_state + (i * 10000)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=current_random_state)

    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X_train, y_train)

    y_pred = knn.predict(X_test)

    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    results.append({
        "Sheet": f"train{i} & test{i}",
        "Random State": current_random_state,
        "Precision": precision,
        "Recall": recall,
        "F1-Score": f1,
        "Accuracy": accuracy
    })

for result in results:
    print(f"Evaluasi untuk {result['Sheet']} (Random State: {result['Random State']}):")
    print(f"  Precision: {result['Precision']:.4f}")
    print(f"  Recall: {result['Recall']:.4f}")
    print(f"  F1-Score: {result['F1-Score']:.4f}")
    print(f"  Accuracy: {result['Accuracy']:.4f}")
    print()

avg_precision = np.mean([result["Precision"] for result in results])
avg_recall = np.mean([result["Recall"] for result in results])
avg_f1 = np.mean([result["F1-Score"] for result in results])
avg_accuracy = np.mean([result["Accuracy"] for result in results])

print("Rata-rata dari semua evaluasi:")
print(f"  Rata-rata Precision: {avg_precision:.4f}")
print(f"  Rata-rata Recall: {avg_recall:.4f}")
print(f"  Rata-rata F1-Score: {avg_f1:.4f}")
print(f"  Rata-rata Accuracy: {avg_accuracy:.4f}")

Evaluasi untuk train1 & test1 (Random State: 22345):
  Precision: 0.6989
  Recall: 0.5909
  F1-Score: 0.6404
  Accuracy: 0.7630

Evaluasi untuk train2 & test2 (Random State: 32345):
  Precision: 0.7363
  Recall: 0.5982
  F1-Score: 0.6601
  Accuracy: 0.7760

Evaluasi untuk train3 & test3 (Random State: 42345):
  Precision: 0.7184
  Recall: 0.6167
  F1-Score: 0.6637
  Accuracy: 0.7565

Evaluasi untuk train4 & test4 (Random State: 52345):
  Precision: 0.7300
  Recall: 0.6577
  F1-Score: 0.6919
  Accuracy: 0.7890

Evaluasi untuk train5 & test5 (Random State: 62345):
  Precision: 0.6907
  Recall: 0.6204
  F1-Score: 0.6537
  Accuracy: 0.7695

Rata-rata dari semua evaluasi:
  Rata-rata Precision: 0.7149
  Rata-rata Recall: 0.6168
  Rata-rata F1-Score: 0.6620
  Rata-rata Accuracy: 0.7708


In [6]:
#Decision Tree
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import KFold, cross_validate
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

file_path = "/content/drive/MyDrive/diabetes.xlsx"

train = pd.read_excel(file_path, sheet_name="train1")
test = pd.read_excel(file_path, sheet_name="test1")

data = pd.concat([train, test], ignore_index=True)

X = data.drop(columns=["Outcome"])
y = data["Outcome"]

clf = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=42)

kfold = KFold(n_splits=5, shuffle=True, random_state=42)

precision_scores = []
recall_scores = []
f1_scores = []
accuracy_scores = []

for train_index, test_index in kfold.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    precision_scores.append(precision_score(y_test, y_pred))
    recall_scores.append(recall_score(y_test, y_pred))
    f1_scores.append(f1_score(y_test, y_pred))
    accuracy_scores.append(accuracy_score(y_test, y_pred))

print("Metrik untuk setiap fold:")
for i in range(5):
    print(f"Fold {i+1}:")
    print(f"  Precision: {precision_scores[i]:.4f}")
    print(f"  Recall: {recall_scores[i]:.4f}")
    print(f"  F1-Score: {f1_scores[i]:.4f}")
    print(f"  Akurasi: {accuracy_scores[i]:.4f}")
    print()

avg_precision = sum(precision_scores) / 5
avg_recall = sum(recall_scores) / 5
avg_f1 = sum(f1_scores) / 5
avg_accuracy = sum(accuracy_scores) / 5

print("Rata-rata metrik:")
print(f"  Precision: {avg_precision:.4f}")
print(f"  Recall: {avg_recall:.4f}")
print(f"  F1-Score: {avg_f1:.4f}")
print(f"  Akurasi: {avg_accuracy:.4f}")

Metrik untuk setiap fold:
Fold 1:
  Precision: 0.7188
  Recall: 0.6106
  F1-Score: 0.6603
  Akurasi: 0.7695

Fold 2:
  Precision: 0.7442
  Recall: 0.2857
  F1-Score: 0.4129
  Akurasi: 0.7036

Fold 3:
  Precision: 0.7097
  Recall: 0.4583
  F1-Score: 0.5570
  Akurasi: 0.7720

Fold 4:
  Precision: 0.6364
  Recall: 0.6238
  F1-Score: 0.6300
  Akurasi: 0.7590

Fold 5:
  Precision: 0.6700
  Recall: 0.5877
  F1-Score: 0.6262
  Akurasi: 0.7394

Rata-rata metrik:
  Precision: 0.6958
  Recall: 0.5132
  F1-Score: 0.5773
  Akurasi: 0.7487


In [7]:
#Artificial Neural Network
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

file_path = "/content/drive/MyDrive/diabetes.xlsx"

train = pd.read_excel(file_path, sheet_name="train1")
test = pd.read_excel(file_path, sheet_name="test1")

data = pd.concat([train, test], ignore_index=True)

X = data.drop(columns=["Outcome"]).values
y = data["Outcome"].values

X = (X - np.min(X, axis=0)) / (np.max(X, axis=0) - np.min(X, axis=0))

kfold = KFold(n_splits=5, shuffle=True, random_state=42)

precision_scores = []
recall_scores = []
f1_scores = []
accuracy_scores = []

for train_index, test_index in kfold.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model = Sequential()
    model.add(Dense(12, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

    model.fit(X_train, y_train, epochs=50, batch_size=10, verbose=0)

    y_pred = (model.predict(X_test) > 0.5).astype(int)

    precision_scores.append(precision_score(y_test, y_pred))
    recall_scores.append(recall_score(y_test, y_pred))
    f1_scores.append(f1_score(y_test, y_pred))
    accuracy_scores.append(accuracy_score(y_test, y_pred))

print("Metrik untuk setiap fold:")
for i in range(5):
    print(f"Fold {i+1}:")
    print(f"  Precision: {precision_scores[i]:.4f}")
    print(f"  Recall: {recall_scores[i]:.4f}")
    print(f"  F1-Score: {f1_scores[i]:.4f}")
    print(f"  Akurasi: {accuracy_scores[i]:.4f}")
    print()

avg_precision = sum(precision_scores) / 5
avg_recall = sum(recall_scores) / 5
avg_f1 = sum(f1_scores) / 5
avg_accuracy = sum(accuracy_scores) / 5

print("Rata-rata metrik:")
print(f"  Precision: {avg_precision:.4f}")
print(f"  Recall: {avg_recall:.4f}")
print(f"  F1-Score: {avg_f1:.4f}")
print(f"  Akurasi: {avg_accuracy:.4f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
Metrik untuk setiap fold:
Fold 1:
  Precision: 0.7750
  Recall: 0.5487
  F1-Score: 0.6425
  Akurasi: 0.7760

Fold 2:
  Precision: 0.7204
  Recall: 0.5982
  F1-Score: 0.6537
  Akurasi: 0.7687

Fold 3:
  Precision: 0.7333
  Recall: 0.5729
  F1-Score: 0.6433
  Akurasi: 0.8013

Fold 4:
  Precision: 0.7108
  Recall: 0.5842
  F1-Score: 0.6413
  Akurasi: 0.7850

Fold 5:
  Precision: 0.8000
  Recall: 0.4912
  F1-Score: 0.6087
  Akurasi: 0.7655

Rata-rata metrik:
  Precision: 0.7479
  Recall: 0.5590
  F1-Score: 0.6379
  Akurasi: 0.7793


In [8]:
#K-Nearest Neighbor
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

file_path = "/content/drive/MyDrive/diabetes.xlsx"

train = pd.read_excel(file_path, sheet_name="train1")
test = pd.read_excel(file_path, sheet_name="test1")

data = pd.concat([train, test], ignore_index=True)

X = data.drop(columns=["Outcome"]).values
y = data["Outcome"].values

scaler = StandardScaler()
X = scaler.fit_transform(X)

kfold = KFold(n_splits=5, shuffle=True, random_state=42)

precision_scores = []
recall_scores = []
f1_scores = []
accuracy_scores = []

for train_index, test_index in kfold.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    knn = KNeighborsClassifier(n_neighbors=5)

    knn.fit(X_train, y_train)

    y_pred = knn.predict(X_test)

    precision_scores.append(precision_score(y_test, y_pred))
    recall_scores.append(recall_score(y_test, y_pred))
    f1_scores.append(f1_score(y_test, y_pred))
    accuracy_scores.append(accuracy_score(y_test, y_pred))

print("Metrik untuk setiap fold:")
for i in range(5):
    print(f"Fold {i+1}:")
    print(f"  Precision: {precision_scores[i]:.4f}")
    print(f"  Recall: {recall_scores[i]:.4f}")
    print(f"  F1-Score: {f1_scores[i]:.4f}")
    print(f"  Akurasi: {accuracy_scores[i]:.4f}")
    print()

avg_precision = sum(precision_scores) / 5
avg_recall = sum(recall_scores) / 5
avg_f1 = sum(f1_scores) / 5
avg_accuracy = sum(accuracy_scores) / 5

print("Rata-rata metrik:")
print(f"  Precision: {avg_precision:.4f}")
print(f"  Recall: {avg_recall:.4f}")
print(f"  F1-Score: {avg_f1:.4f}")
print(f"  Akurasi: {avg_accuracy:.4f}")

Metrik untuk setiap fold:
Fold 1:
  Precision: 0.7879
  Recall: 0.6903
  F1-Score: 0.7358
  Akurasi: 0.8182

Fold 2:
  Precision: 0.7500
  Recall: 0.6696
  F1-Score: 0.7075
  Akurasi: 0.7980

Fold 3:
  Precision: 0.7021
  Recall: 0.6875
  F1-Score: 0.6947
  Akurasi: 0.8111

Fold 4:
  Precision: 0.7087
  Recall: 0.7228
  F1-Score: 0.7157
  Akurasi: 0.8111

Fold 5:
  Precision: 0.7700
  Recall: 0.6754
  F1-Score: 0.7196
  Akurasi: 0.8046

Rata-rata metrik:
  Precision: 0.7437
  Recall: 0.6891
  F1-Score: 0.7147
  Akurasi: 0.8086


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(1029, 8)
(1029,)
(507, 8)
(507,)


In [10]:
clf_en = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=0)
clf_en.fit(X_train, y_train)

In [11]:
y_pred_en = clf_en.predict(X_test)

print('Model accuracy score with criterion entropy: {0:0.4f}'.format(accuracy_score(y_test, y_pred_en)))

Model accuracy score with criterion entropy: 0.7594


In [12]:
print('Training set score: {:.4f}'.format(clf_en.score(X_train, y_train)))
print('Test set score: {:.4f}'.format(clf_en.score(X_test, y_test)))

Training set score: 0.7784
Test set score: 0.7594
