In [4]:
# Import library yang diperlukan
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Load dataset
file_path = 'heart.csv'
data = pd.read_csv(file_path)

# Eksplorasi dataset
print("Head of dataset:\n", data.head())
print("\nInfo dataset:\n")
data.info()
print("\nStatistik deskriptif:\n", data.describe())

# Cek missing values
print("\nMissing values:\n", data.isnull().sum())

# Pisahkan fitur (X) dan target (y)
X = data.drop(columns=['target'])  # 'target' adalah kolom prediksi
y = data['target']

# Normalisasi fitur numerik (opsional, tergantung model)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split dataset menjadi training dan testing (80:20)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Pilih model (Random Forest Classifier)
model = RandomForestClassifier(random_state=42)

# Latih model
model.fit(X_train, y_train)

# Prediksi pada data testing
y_pred = model.predict(X_test)

# Evaluasi model
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nAccuracy Score:", accuracy_score(y_test, y_pred))

# Buat 10 data uji secara manual untuk pengujian
new_data = np.array([
    [63, 1, 3, 145, 233, 1, 0, 150, 0, 2.3, 0, 0, 1],
    [37, 0, 2, 130, 250, 0, 1, 187, 0, 3.5, 1, 0, 2],
    [41, 1, 1, 130, 204, 0, 0, 172, 0, 1.4, 1, 0, 3],
    # [56, 1, 1, 120, 236, 0, 1, 178, 0, 0.8, 1, 0, 3],
    # [57, 0, 0, 140, 241, 0, 1, 123, 1, 0.2, 1, 0, 3],
    # [57, 1, 0, 120, 354, 0, 1, 163, 1, 0.6, 1, 0, 3],
    # [60, 1, 2, 140, 294, 0, 1, 153, 0, 2.0, 1, 2, 7],
    # [62, 0, 0, 160, 164, 0, 1, 145, 0, 6.2, 1, 3, 3],
    # [64, 1, 0, 120, 221, 0, 1, 106, 0, 5.6, 1, 0, 7],
    # [59, 1, 0, 140, 177, 0, 1, 162, 1, 0.0, 2, 1, 7]
])

# Normalisasi data uji
new_data_scaled = scaler.transform(new_data)

# Prediksi data uji
new_predictions = model.predict(new_data_scaled)
for i, prediction in enumerate(new_predictions):
    status = "memiliki penyakit jantung" if prediction == 1 else "tidak memiliki penyakit jantung"
    print(f"Data tes {i+1} bernilai {prediction} atau {status}.")

# Penjelasan penggunaan:
# 1. Pastikan data sudah di-preprocess dengan format yang sama seperti data training.
# 2. Gunakan scaler yang sama untuk normalisasi data baru.
# 3. Gunakan model yang sudah dilatih untuk melakukan prediksi terhadap data baru.
# 4. Interpretasi hasil prediksi: 1 = memiliki penyakit jantung, 0 = tidak memiliki penyakit jantung.


Head of dataset:
    age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  

Info dataset:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   cho

