In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Our Columns:
1. age
2. sex
3. chest pain type (4 values)
4. resting blood pressure
5. serum cholestoral in mg/dl
6. fasting blood sugar > 120 mg/dl
7. resting electrocardiographic results (values 0,1,2)
8. maximum heart rate achieved
9. exercise induced angina
10. oldpeak = ST depression induced by exercise relative to rest
11. the slope of the peak exercise ST segment
12. number of major vessels (0-3) colored by flourosopy
13. thal: 3 = normal; 6 = fixed defect; 7 = reversable defect

In [None]:
heart = pd.read_csv('../input/heart-disease-uci/heart.csv')

### No Null Value & No Categorical Columns

In [None]:
heart.info()

### Now Let's Look At Columns And Realize What's The Pourpose Of Values
#### _Age_ & _Sex_ it's clear Let's Look At cp column it's Mean Chest Pain Type 0-3 (1 = male; 0 = female)
#### _trestbps_ column resting blood pressure  فشار خون در حال استراحت  
#### _chol_ cholestrol
#### _fbs:_ قند خون ناشتا That's Clear (1 = true; 0 = false)
#### _restcg:_ result of radiographic
#### _thalach:_ maximum heart rate achieved 
#### _exang:_    (1 = yes; 0 = no) آنژین ناشی از ورزش
#### _oldpeak:_ Clear
#### _slope:_ 
#### _ca:_ تعداد عروق اصلی (0-3) با استفاده از آلوئوسوپی
#### _thal:_ Clear

In [None]:
heart

### Let's Go:
### We Want To See Which Columns Are More Effect On Target We Use corr()
### It's kinda A Feature Selecting 
### Heart Rate & Slope & Resting blood pressure Most Effect On Target

In [None]:
plt.figure(figsize=(8, 8))
sns.heatmap(heart.corr(), annot=True, fmt='.2f')

In [None]:
x = heart.drop('target', axis='columns')
y = heart['target']

In [None]:
from sklearn.feature_selection import chi2, SelectKBest

In [None]:
best_feature = SelectKBest(chi2, k=3)
fit_feature = best_feature.fit(x, y)
score = pd.DataFrame(fit_feature.scores_)
columns = pd.DataFrame(x.columns)
most_festure = pd.concat([score, columns], axis=1)
most_festure.columns = ['score', 'columns']
#most_festure.sort_values(by='score', ascending=False)
most_festure.nlargest(3, 'score')

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
x_standard = StandardScaler().fit_transform(x)
pca = PCA(n_components=3)
x_pca_standard = pca.fit_transform(x_standard)

In [None]:
pca.explained_variance_ratio_

In [None]:
principalDf = pd.DataFrame(data = x_pca_standard
             , columns = ['principal component 1', 'principal component 2', 'principal component 3'])
result_pca_target = pd.concat([principalDf, y], axis=1)
result_pca_target

In [None]:
ax = plt.figure(figsize=(16,10)).gca(projection='3d')
ax.scatter(
    xs=result_pca_target["principal component 1"], 
    ys=result_pca_target["principal component 2"], 
    zs=result_pca_target["principal component 3"], 
    c=result_pca_target.loc[:,:]["target"], 
    cmap='tab10'
)
ax.set_xlabel('pca-one')
ax.set_ylabel('pca-two')
ax.set_zlabel('pca-three')
plt.show()

#### most age effect on cp : 

In [None]:
x.groupby(['age', 'cp']).size().sort_values(ascending=False)[:20]

#### slope 1 and cp 0 Most Effect on Each other

In [None]:
x.groupby(['slope', 'cp']).size().sort_values(ascending=False)[:20].plot(kind='bar')

In [None]:
from sklearn.model_selection import train_test_split, cross_val_score
x_train, x_test, y_train, y_test = train_test_split(x_pca_standard, y, test_size=0.3)

In [None]:
from sklearn.svm import SVC
svm = SVC(kernel='poly', degree=3, C=200)
svm.fit(x_train, y_train)
y_predict_svm = svm.predict(x_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_predict_svm)

In [None]:
from sklearn.ensemble import RandomForestClassifier
random_clf = RandomForestClassifier(max_depth=5, n_estimators=120, criterion='entropy', n_jobs=-1, verbose=True )
random_clf.fit(x_train, y_train)
y_predict_random = random_clf.predict(x_test)
accuracy_score(y_test, y_predict_random)

In [None]:
from sklearn.tree import DecisionTreeClassifier
tree_clf = DecisionTreeClassifier(max_depth=11, criterion='entropy')
tree_clf.fit(x_train, y_train)
y_predict_tree = tree_clf.predict(x_test)
accuracy_score(y_test, y_predict_tree)

In [None]:
from sklearn.linear_model import LogisticRegression
log_clf = LogisticRegression(penalty='l2', solver='lbfgs', multi_class='multinomial')
log_clf.fit(x_train, y_train)
y_predict_log = log_clf.predict(x_test)
accuracy_score(y_test, y_predict_log)