In [183]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingRegressor

from sklearn.metrics import r2_score, mean_squared_error, accuracy_score

from sklearn.ensemble import RandomForestClassifier


from sklearn.model_selection import GridSearchCV

In [160]:
data = pd.read_csv("heart.csv")
data[:10]

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0
5,39,M,NAP,120,339,0,Normal,170,N,0.0,Up,0
6,45,F,ATA,130,237,0,Normal,170,N,0.0,Up,0
7,54,M,ATA,110,208,0,Normal,142,N,0.0,Up,0
8,37,M,ASY,140,207,0,Normal,130,Y,1.5,Flat,1
9,48,F,ATA,120,284,0,Normal,120,N,0.0,Up,0


In [161]:
data.isnull().sum()

Age               0
Sex               0
ChestPainType     0
RestingBP         0
Cholesterol       0
FastingBS         0
RestingECG        0
MaxHR             0
ExerciseAngina    0
Oldpeak           0
ST_Slope          0
HeartDisease      0
dtype: int64

In [162]:
data.describe()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease
count,918.0,918.0,918.0,918.0,918.0,918.0,918.0
mean,53.510893,132.396514,198.799564,0.233115,136.809368,0.887364,0.553377
std,9.432617,18.514154,109.384145,0.423046,25.460334,1.06657,0.497414
min,28.0,0.0,0.0,0.0,60.0,-2.6,0.0
25%,47.0,120.0,173.25,0.0,120.0,0.0,0.0
50%,54.0,130.0,223.0,0.0,138.0,0.6,1.0
75%,60.0,140.0,267.0,0.0,156.0,1.5,1.0
max,77.0,200.0,603.0,1.0,202.0,6.2,1.0


In [163]:
data["Sex"].value_counts(), data["ChestPainType"].value_counts()

(Sex
 M    725
 F    193
 Name: count, dtype: int64,
 ChestPainType
 ASY    496
 NAP    203
 ATA    173
 TA      46
 Name: count, dtype: int64)

In [164]:
data['Sex'] = data["Sex"].replace({'M': 0, 'F': 1})
data_ = pd.get_dummies(data['ChestPainType']).astype("int8")
data_ = data_.drop("TA", axis=1)

In [165]:
data = pd.concat([data,data_], axis="columns")

In [166]:
data.drop("ChestPainType", axis=1, inplace=True)

In [167]:
data["RestingECG"].value_counts()
data_ = pd.get_dummies(data['RestingECG']).astype("int8")
x = data_.drop("ST", axis=1)
data = pd.concat([data,x], axis="columns")
data.drop("RestingECG", inplace=True, axis=1)

In [168]:
data

Unnamed: 0,Age,Sex,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease,ASY,ATA,NAP,LVH,Normal
0,40,0,140,289,0,172,N,0.0,Up,0,0,1,0,0,1
1,49,1,160,180,0,156,N,1.0,Flat,1,0,0,1,0,1
2,37,0,130,283,0,98,N,0.0,Up,0,0,1,0,0,0
3,48,1,138,214,0,108,Y,1.5,Flat,1,1,0,0,0,1
4,54,0,150,195,0,122,N,0.0,Up,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,0,110,264,0,132,N,1.2,Flat,1,0,0,0,0,1
914,68,0,144,193,1,141,N,3.4,Flat,1,1,0,0,0,1
915,57,0,130,131,0,115,Y,1.2,Flat,1,1,0,0,0,1
916,57,1,130,236,0,174,N,0.0,Flat,1,0,1,0,1,0


In [169]:
data["ExerciseAngina"].value_counts()
data['ExerciseAngina'] = data["ExerciseAngina"].replace({'N': 0, 'Y': 1})
data

Unnamed: 0,Age,Sex,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease,ASY,ATA,NAP,LVH,Normal
0,40,0,140,289,0,172,0,0.0,Up,0,0,1,0,0,1
1,49,1,160,180,0,156,0,1.0,Flat,1,0,0,1,0,1
2,37,0,130,283,0,98,0,0.0,Up,0,0,1,0,0,0
3,48,1,138,214,0,108,1,1.5,Flat,1,1,0,0,0,1
4,54,0,150,195,0,122,0,0.0,Up,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,0,110,264,0,132,0,1.2,Flat,1,0,0,0,0,1
914,68,0,144,193,1,141,0,3.4,Flat,1,1,0,0,0,1
915,57,0,130,131,0,115,1,1.2,Flat,1,1,0,0,0,1
916,57,1,130,236,0,174,0,0.0,Flat,1,0,1,0,1,0


In [170]:
data["ST_Slope"].value_counts()
data_ = pd.get_dummies(data['ST_Slope']).astype("int8")
x = data_.drop("Down", axis=1)
data = pd.concat([data,x], axis="columns")
data.drop("ST_Slope", inplace=True, axis=1)
data

Unnamed: 0,Age,Sex,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak,HeartDisease,ASY,ATA,NAP,LVH,Normal,Flat,Up
0,40,0,140,289,0,172,0,0.0,0,0,1,0,0,1,0,1
1,49,1,160,180,0,156,0,1.0,1,0,0,1,0,1,1,0
2,37,0,130,283,0,98,0,0.0,0,0,1,0,0,0,0,1
3,48,1,138,214,0,108,1,1.5,1,1,0,0,0,1,1,0
4,54,0,150,195,0,122,0,0.0,0,0,0,1,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,0,110,264,0,132,0,1.2,1,0,0,0,0,1,1,0
914,68,0,144,193,1,141,0,3.4,1,1,0,0,0,1,1,0
915,57,0,130,131,0,115,1,1.2,1,1,0,0,0,1,1,0
916,57,1,130,236,0,174,0,0.0,1,0,1,0,1,0,1,0


In [171]:
data["HeartDisease"].value_counts()

HeartDisease
1    508
0    410
Name: count, dtype: int64

In [172]:
y = data["HeartDisease"]
X = data.drop("HeartDisease", axis=1)
X

Unnamed: 0,Age,Sex,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak,ASY,ATA,NAP,LVH,Normal,Flat,Up
0,40,0,140,289,0,172,0,0.0,0,1,0,0,1,0,1
1,49,1,160,180,0,156,0,1.0,0,0,1,0,1,1,0
2,37,0,130,283,0,98,0,0.0,0,1,0,0,0,0,1
3,48,1,138,214,0,108,1,1.5,1,0,0,0,1,1,0
4,54,0,150,195,0,122,0,0.0,0,0,1,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,0,110,264,0,132,0,1.2,0,0,0,0,1,1,0
914,68,0,144,193,1,141,0,3.4,1,0,0,0,1,1,0
915,57,0,130,131,0,115,1,1.2,1,0,0,0,1,1,0
916,57,1,130,236,0,174,0,0.0,0,1,0,1,0,1,0


In [173]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)



In [174]:
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)

In [193]:
from sklearn.metrics import accuracy_score
model = RandomForestClassifier(min_samples_split=10, n_estimators=150, random_state=42, max_depth = 4)
model.fit(X_train_s, y_train)

Ypredict = model.predict(X_test_s)
accuracy_Y = accuracy_score(y_test, Ypredict)

Ypredict_train = model.predict(X_train_s)
accuracy_X = accuracy_score(y_train, Ypredict_train)  

accuracy_Y,accuracy_X


(0.8695652173913043, 0.8909883720930233)

In [191]:
parameters = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}


grid_search = GridSearchCV(estimator=model, param_grid=parameters, cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X_train_s, y_train)


best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

best_params, best_estimator

Fitting 5 folds for each of 81 candidates, totalling 405 fits


({'max_depth': None,
  'min_samples_leaf': 1,
  'min_samples_split': 10,
  'n_estimators': 150},
 RandomForestClassifier(min_samples_split=10, n_estimators=150, random_state=42))

In [194]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

In [201]:
clf1 = LogisticRegression(max_iter=1000, random_state=42)
clf2 = DecisionTreeClassifier(min_samples_split=3,random_state=42)
clf3 = SVC(kernel='poly', C=1, probability=True, random_state=42)
clf4 = KNeighborsClassifier(n_neighbors=5)
clf5 = GaussianNB()

ensemble_classifier = VotingClassifier(estimators=[('lr', clf1), ('dt', clf2), ('svc', clf3), ('knn', clf4), ('nb', clf5)])

ensemble_classifier.fit(X_train_s, y_train)

y_pred = ensemble_classifier.predict(X_test_s)

Ypredict = model.predict(X_test_s)
accuracy_Y = accuracy_score(y_test, Ypredict)

Ypredict_train = model.predict(X_train_s)
accuracy_X = accuracy_score(y_train, Ypredict_train)  


accuracy_Y, accuracy_X

(0.8695652173913043, 0.8909883720930233)

In [205]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier


In [208]:
base_estimator = DecisionTreeClassifier(max_depth=1)

adaboost_classifier = AdaBoostClassifier(estimator=base_estimator, n_estimators=50, random_state=42)

adaboost_classifier.fit(X_train_s, y_train)

y_pred = adaboost_classifier.predict(X_test_s)

Ypredict = adaboost_classifier.predict(X_test_s)
accuracy_Y = accuracy_score(y_test, Ypredict)

Ypredict_train = model.predict(X_train_s)
accuracy_X = accuracy_score(y_train, Ypredict_train)  


accuracy_Y, accuracy_X

(0.8608695652173913, 0.8909883720930233)

In [209]:
gb_classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

gb_classifier.fit(X_train_s, y_train)

y_pred = gb_classifier.predict(X_test_s)
Ypredict = gb_classifier.predict(X_test_s)
accuracy_Y = accuracy_score(y_test, Ypredict)

Ypredict_train = model.predict(X_train_s)
accuracy_X = accuracy_score(y_train, Ypredict_train)  


accuracy_Y, accuracy_X

(0.8608695652173913, 0.8909883720930233)

In [210]:
!git init


Reinitialized existing Git repository in C:/Users/User/.git/


In [212]:

!git commit -m "first commit"


On branch master

Initial commit

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	.bash_history
	.dotnet/
	.gitconfig
	.ipynb_checkpoints/
	.ipython/
	.jdks/
	.jupyter/
	.m2/
	.matplotlib/
	.node_repl_history
	.templateengine/
	.vscode/
	3D Objects/
	AppData/
	Contacts/
	Desktop/
	Documents/
	Downloads/
	Favorites/
	IdeaProjects/
	IntelGraphicsProfiles/
	Links/
	MicrosoftEdgeBackups/
	Music/
	NTUSER.DAT
	NTUSER.DAT{a2332f18-cdbf-11ec-8680-002248483d79}.TM.blf
	NTUSER.DAT{a2332f18-cdbf-11ec-8680-002248483d79}.TMContainer00000000000000000001.regtrans-ms
	NTUSER.DAT{a2332f18-cdbf-11ec-8680-002248483d79}.TMContainer00000000000000000002.regtrans-ms
	OneDrive/
	Pictures/
	PycharmProjects/
	Saved Games/
	Searches/
	Untitled1.ipynb
	Untitled2.ipynb
	Untitled3.ipynb
	Untitled4.ipynb
	Untitled5.ipynb
	Untitled6.ipynb
	Untitled7.ipynb
	Videos/
	edb_pem_sqlprofiler_pg15.exe
	edb_pgagent_pg15.exe
	edb_psqlodbc.exe
	heart.csv
	name.ipynb
	ntuser.dat.LOG1
	ntuser.da



In [216]:

!git branch -M main

In [217]:

!git push -u origin main

error: src refspec main does not match any
error: failed to push some refs to 'https://github.com/rr0r4r/titled.git'


In [220]:
!git add titled.ipynb

fatal: Unable to create 'C:/Users/User/.git/index.lock': File exists.

Another git process seems to be running in this repository, e.g.
an editor opened by 'git commit'. Please make sure all processes
are terminated then try again. If it still fails, a git process
may have crashed in this repository earlier:
remove the file manually to continue.
error: open("AppData/Local/Comms/UnistoreDB/USS.jtx"): Permission denied
error: unable to index file 'AppData/Local/Comms/UnistoreDB/USS.jtx'
fatal: adding files failed
