In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

In [3]:

# Veri setini yükleyelim
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
columns = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']
data = pd.read_csv(url, names=columns)
data

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,2
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,45.0,1.0,1.0,110.0,264.0,0.0,0.0,132.0,0.0,1.2,2.0,0.0,7.0,1
299,68.0,1.0,4.0,144.0,193.0,1.0,0.0,141.0,0.0,3.4,2.0,2.0,7.0,2
300,57.0,1.0,4.0,130.0,131.0,0.0,0.0,115.0,1.0,1.2,2.0,1.0,7.0,3
301,57.0,0.0,2.0,130.0,236.0,0.0,2.0,174.0,0.0,0.0,2.0,1.0,3.0,1


In [8]:

# Eksik değerleri NaN ile değiştir ve düşür
data = data.replace('?', np.nan)
data.dropna(inplace=True)

In [9]:
# Hedef değişkeni ikili formata çevir (hastalık var mı yok mu?)
data['target'] = data['target'].apply(lambda x: 1 if x > 0 else 0)
data

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,1
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
297,57.0,0.0,4.0,140.0,241.0,0.0,0.0,123.0,1.0,0.2,2.0,0.0,7.0,1
298,45.0,1.0,1.0,110.0,264.0,0.0,0.0,132.0,0.0,1.2,2.0,0.0,7.0,1
299,68.0,1.0,4.0,144.0,193.0,1.0,0.0,141.0,0.0,3.4,2.0,2.0,7.0,1
300,57.0,1.0,4.0,130.0,131.0,0.0,0.0,115.0,1.0,1.2,2.0,1.0,7.0,1


In [11]:
# Özellikler ve hedef değişkeni ayır
X = data.drop('target', axis=1)
y = data['target']

# Veriyi eğitim ve test seti olarak böl
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Özellikleri ölçeklendir
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [12]:
from sklearn.ensemble import GradientBoostingClassifier

# GBM modelini başlat
gbm = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# Modeli eğitim verisiyle eğit
gbm.fit(X_train_scaled, y_train)


GradientBoostingClassifier(random_state=42)

In [13]:
from sklearn.metrics import accuracy_score, classification_report

# Test seti üzerinden tahmin yap
predictions = gbm.predict(X_test_scaled)

# Modelin doğruluk oranını hesapla ve raporla
accuracy = accuracy_score(y_test, predictions)
report = classification_report(y_test, predictions)

print(f"Model Accuracy: {accuracy}")
print(report)


Model Accuracy: 0.8181818181818182
              precision    recall  f1-score   support

           0       0.80      0.89      0.84        53
           1       0.85      0.74      0.79        46

    accuracy                           0.82        99
   macro avg       0.82      0.81      0.81        99
weighted avg       0.82      0.82      0.82        99



In [14]:
# Yeni bir hasta örneği oluştur (bu değerler tamamen kurgusal)
new_patient = np.array([[57, 1, 3, 140, 240, 0, 1, 140, 0, 1.0, 1, 0, 3]])

# Eğitim verisi üzerinde fit edilmiş scaler ile yeni veriyi ölçeklendir
new_patient_scaled = scaler.transform(new_patient)

# Modeli kullanarak yeni hastanın kalp hastalığı olup olmadığını tahmin et
prediction = gbm.predict(new_patient_scaled)
prediction_proba = gbm.predict_proba(new_patient_scaled)

print("Tahmin edilen sınıf:", prediction[0])
print("Tahmin olasılıkları (Hastalık Yok, Hastalık Var):", prediction_proba[0])


Tahmin edilen sınıf: 0
Tahmin olasılıkları (Hastalık Yok, Hastalık Var): [0.87019963 0.12980037]


