In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load the dataset from a URL
# Is dataset mein 'target' column batata hai ki patient ko heart disease hai (1) ya nahi (0).
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
column_names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']
data = pd.read_csv(url, header=None, names=column_names, na_values='?')

# Handle missing values by dropping rows with any missing data for simplicity
data.dropna(inplace=True)

# Binarize the target variable: values > 0 mean heart disease (1), 0 means no disease (0)
data['target'] = (data['target'] > 0).astype(int)


# Let's see the first 5 rows of our data
print("Data ki pehli 5 lines:")
print(data.head())

Data ki pehli 5 lines:
    age  sex   cp  trestbps   chol  fbs  restecg  thalach  exang  oldpeak  \
0  63.0  1.0  1.0     145.0  233.0  1.0      2.0    150.0    0.0      2.3   
1  67.0  1.0  4.0     160.0  286.0  0.0      2.0    108.0    1.0      1.5   
2  67.0  1.0  4.0     120.0  229.0  0.0      2.0    129.0    1.0      2.6   
3  37.0  1.0  3.0     130.0  250.0  0.0      0.0    187.0    0.0      3.5   
4  41.0  0.0  2.0     130.0  204.0  0.0      2.0    172.0    0.0      1.4   

   slope   ca  thal  target  
0    3.0  0.0   6.0       0  
1    2.0  3.0   3.0       1  
2    2.0  2.0   7.0       1  
3    3.0  0.0   3.0       0  
4    1.0  0.0   3.0       0  


In [2]:
# 'target' column ko chhodkar baaki sab hamare features (X) hain
X = data.drop('target', axis=1)

# 'target' column hamara output (y) hai
y = data['target']

# Data ko 80% training aur 20% testing mein baant do
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features for better performance
# Isse saare features ek jaise scale par aa jaate hain
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


print("\nTraining ke liye data points:", X_train.shape[0])
print("Testing ke liye data points:", X_test.shape[0])


Training ke liye data points: 237
Testing ke liye data points: 60


In [3]:
# Logistic Regression ka model banate hain
model = LogisticRegression()

# Model ko training data par sikhate hain (fit karte hain)
print("\nModel training shuru...")
model.fit(X_train, y_train)
print("Model training poori hui!")


Model training shuru...
Model training poori hui!


In [4]:
# Har feature ke liye seekhe gaye weights ko dekhte hain
learned_weights = model.coef_[0]
feature_names = X.columns

print("\nModel ne yeh weights seekhe hain:")
for feature, weight in zip(feature_names, learned_weights):
    print(f"{feature}: {weight:.4f}")

# A positive weight means that feature increases the chance of heart disease.
# A negative weight means it decreases the chance.


Model ne yeh weights seekhe hain:
age: -0.0667
sex: 0.6371
cp: 0.3628
trestbps: 0.4681
chol: 0.3225
fbs: -0.4408
restecg: 0.2049
thalach: -0.4731
exang: 0.4317
oldpeak: 0.3357
slope: 0.2409
ca: 1.1399
thal: 0.5331


In [5]:
# Test data par prediction karte hain
y_pred = model.predict(X_test)

# Model ki accuracy check karte hain
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel ki Accuracy: {accuracy * 100:.2f}%")


Model ki Accuracy: 86.67%


In [6]:
# Naye patient ka data (13 features ke hisaab se)
# Let's say: age=52, sex=1, cp=2, trestbps=172, chol=199, etc.
new_patient_data = [[52, 1, 2, 172, 199, 1, 1, 162, 0, 0.5, 2, 0, 3]]

# Important: Naye data ko bhi usi scaler se transform karna hoga
new_patient_scaled = scaler.transform(new_patient_data)

# Naye patient ke liye prediction karna
prediction = model.predict(new_patient_scaled)
prediction_probability = model.predict_proba(new_patient_scaled)

print("\n--- Naye Patient ki Report ---")
if prediction[0] == 1:
    print("Model ka Faisla: Is patient ko heart disease hone ka khatra hai.")
else:
    print("Model ka Faisla: Is patient ko heart disease hone ka khatra nahi hai.")

print(f"Confidence (Probability):")
print(f"  - No Heart Disease (0): {prediction_probability[0][0]*100:.2f}%")
print(f"  - Heart Disease (1): {prediction_probability[0][1]*100:.2f}%")


--- Naye Patient ki Report ---
Model ka Faisla: Is patient ko heart disease hone ka khatra nahi hai.
Confidence (Probability):
  - No Heart Disease (0): 89.66%
  - Heart Disease (1): 10.34%


