In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV
import pickle

In [2]:
data = pd.read_csv('heart.csv')
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,thalach,exang,thal,target
0,52,1,0,125,212,0,168,0,3,0
1,53,1,0,140,203,1,155,1,3,0
2,70,1,0,145,174,0,125,1,3,0
3,61,1,0,148,203,0,161,0,3,0
4,62,0,0,138,294,1,106,0,2,0


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 10 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   age       1025 non-null   int64
 1   sex       1025 non-null   int64
 2   cp        1025 non-null   int64
 3   trestbps  1025 non-null   int64
 4   chol      1025 non-null   int64
 5   fbs       1025 non-null   int64
 6   thalach   1025 non-null   int64
 7   exang     1025 non-null   int64
 8   thal      1025 non-null   int64
 9   target    1025 non-null   int64
dtypes: int64(10)
memory usage: 80.2 KB


In [4]:
data.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,thalach,exang,thal,target
count,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0
mean,54.434146,0.69561,0.942439,131.611707,246.0,0.149268,149.114146,0.336585,2.323902,0.513171
std,9.07229,0.460373,1.029641,17.516718,51.59251,0.356527,23.005724,0.472772,0.62066,0.50007
min,29.0,0.0,0.0,94.0,126.0,0.0,71.0,0.0,0.0,0.0
25%,48.0,0.0,0.0,120.0,211.0,0.0,132.0,0.0,2.0,0.0
50%,56.0,1.0,1.0,130.0,240.0,0.0,152.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,275.0,0.0,166.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,202.0,1.0,3.0,1.0


In [5]:
data.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
thalach     0
exang       0
thal        0
target      0
dtype: int64

In [6]:
X = data.drop('target', axis=1)
y = data['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=45)
print(X_train.shape, X_test.shape)
print(y)

(820, 9) (205, 9)
0       0
1       0
2       0
3       0
4       0
       ..
1020    1
1021    0
1022    0
1023    1
1024    0
Name: target, Length: 1025, dtype: int64


In [7]:
model_1 = LogisticRegression()
model_1.fit(X_train, y_train)
y_pred_1 = model_1.predict(X_test)
print(y_pred_1)
accuracy = accuracy_score(y_test, y_pred_1)
report = classification_report(y_test, y_pred_1)
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:')
print(report)

[1 1 0 1 1 1 1 1 0 1 0 1 1 0 0 1 0 1 0 0 0 0 1 1 0 1 1 0 0 0 0 0 0 0 1 0 1
 1 1 1 1 1 0 0 1 0 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 0 1 0 1 1 1 0 1 0 1 1 0 0
 1 1 1 0 1 1 1 0 1 0 0 0 1 1 0 1 0 1 0 1 1 1 1 0 0 0 0 0 1 0 0 0 1 1 1 1 1
 1 1 0 1 1 1 0 0 0 0 0 0 1 1 1 1 0 1 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 1 0 1
 1 0 0 1 1 0 0 0 1 0 0 0 1 0 0 0 1 1 1 0 1 0 0 0 0 0 0 1 1 1 0 0 0 0 1 1 1
 1 1 1 0 1 0 1 0 0 1 0 1 1 1 0 1 0 0 1 1]
Accuracy: 0.77
Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.77      0.75        94
           1       0.79      0.77      0.78       111

    accuracy                           0.77       205
   macro avg       0.76      0.77      0.76       205
weighted avg       0.77      0.77      0.77       205



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [8]:
model_test = RandomForestClassifier(random_state=45)
param_grid = {
    'n_estimators': [50, 100, 150, 200],
    'max_depth': [5, 10, 15, 20],
    'min_samples_split': [5, 10, 15, 20],
}
grid_search = GridSearchCV(model_test, param_grid, cv=5)
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_
print(best_model)

RandomForestClassifier(max_depth=10, min_samples_split=5, n_estimators=200,
                       random_state=45)


In [9]:
model = RandomForestClassifier(n_estimators=200, max_depth=10, min_samples_split=5, random_state=45)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(y_pred)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:')
print(report)

[1 0 0 1 1 0 1 1 0 1 0 1 1 0 0 1 0 1 0 0 0 0 1 1 0 1 1 1 0 0 0 0 0 1 1 0 1
 1 0 0 1 1 0 0 1 1 1 1 1 1 0 1 1 0 1 1 1 0 1 1 1 0 1 0 1 0 1 0 1 0 0 0 0 0
 1 0 0 1 1 1 1 0 1 0 1 0 1 1 0 1 0 1 0 1 1 1 1 0 1 0 0 0 1 0 0 1 0 1 0 1 1
 1 1 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 1 1 1 1 1 1 1 0 1 0 0 0 0 1 0 1 1 0 1 1
 0 1 1 1 1 1 0 1 1 0 0 0 1 0 0 0 0 1 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1
 1 1 1 0 1 1 1 0 1 0 0 1 1 1 0 0 0 0 1 1]
Accuracy: 1.00
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        94
           1       1.00      1.00      1.00       111

    accuracy                           1.00       205
   macro avg       1.00      1.00      1.00       205
weighted avg       1.00      1.00      1.00       205



In [10]:
pickle.dump(model,open('model.pkl','wb'))

In [11]:
model=pickle.load(open('model.pkl','rb'))