## If you find this kernel helpful, Please UPVOTES.

## Problem Definition
* Given clinical parameters about a patient, can we predict whether or not they have heart disease?

## Data contains;

* age - age in years
* sex - (1 = male; 0 = female)
* cp - chest pain type(gogus agrisi tipi)
* trestbps - resting blood pressure (in mm Hg on admission to the hospital) (kan basinci)
* chol - serum cholestoral in mg/dl (mg/dl cinsinden serum kolesterolü)
* fbs - (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false)
* restecg - resting electrocardiographic results(dinlenme elektrokardiyografik sonuçları)
* thalach - maximum heart rate achieved (ulaşılan maksimum kalp atış hızı)
* exang - exercise induced angina (1 = yes; 0 = no) (egzersize bağlı anjina (1 = evet; 0 = hayır))
* oldpeak - ST depression induced by exercise relative to rest (dinlenmeye göre egzersizin neden olduğu ST depresyonu)
* slope - the slope of the peak exercise ST segment (en yüksek egzersiz ST segmentinin eğimi)
* ca - number of major vessels (0-3) colored by flourosopy
* thal - 3 = normal; 6 = fixed defect; 7 = reversable defect
* target - have disease or not (1=yes, 0=no) (hastalığı var mı yok mu (1=evet, 0=hayır))

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.metrics import accuracy_score,mean_squared_error,roc_curve,roc_auc_score,classification_report,r2_score,confusion_matrix

from sklearn.model_selection import train_test_split, GridSearchCV,cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import scale 
from sklearn import model_selection
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import BaggingRegressor


# For data visualization
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns; sns.set()
# Plotly for interactive graphics 
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot

# Disabling warnings
import warnings
warnings.simplefilter("ignore")

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
import warnings
warnings.filterwarnings("ignore")

## READ DATA AND EXPLORING DATA

In [None]:
data=pd.read_csv("/kaggle/input/heart-disease-uci/heart.csv")
df=data.copy()
df.head()

In [None]:
data.sample(5)  #chose randon sample from row

In [None]:
df.info()

In [None]:
df.target.unique()

In [None]:
df.isnull().sum()  

In [None]:
df["target"].value_counts()

In [None]:
df.describe()

In [None]:
df.corr()

## SOME VISUALIZATION

### Count of disease and not desease

In [None]:
sns.countplot(df.target, palette=['green', 'red'])
plt.title("[0] == Not Disease, [1] == Disease");

### Distribution of disease and not disease with scatter

In [None]:
plt.scatter(x=df.age[df.target==1], y=df.thalach[(df.target==1)], c="red")
plt.scatter(x=df.age[df.target==0], y=df.thalach[(df.target==0)])
plt.legend(["Disease", "Not Disease"])
plt.xlabel("Age")
plt.ylabel("Maximum Heart Rate")
plt.show()

### Distrbution of age with distplot*

In [None]:
f, ax = plt.subplots(figsize=(10,6)) #DISTRUBUTION OF AGE WITH DISTPLOT
x = df['age']
ax = sns.distplot(x, bins=10)
plt.show()

### Distribution of age with boxplot

In [None]:
f, ax = plt.subplots(figsize=(8, 6))   #DISTRUBUTION OF AGE WITH BOXPLOT
sns.boxplot(x=df["age"])
plt.show()

### Dividing into age groups

In [None]:
young_ages=df[(df.age>=29)&(df.age<40)] 
middle_ages=df[(df.age>=40)&(df.age<55)]
elderly_ages=df[(df.age>55)]
print('Young Ages :',len(young_ages))
print('Middle Ages :',len(middle_ages))
print('Elderly Ages :',len(elderly_ages))

### Dividing into age groups with barplot

In [None]:
sns.barplot(x=['young ages','middle ages','elderly ages'],y=[len(young_ages),len(middle_ages),len(elderly_ages)])
plt.xlabel('Age Range')
plt.ylabel('Age Counts')
plt.title('Ages State in Dataset')
plt.show()

* There are a few young ages

### Dividing into age groups with pieplot

In [None]:
colors = ['blue','green','yellow']  #we can see in pie.
explode = [0,0,0.1]
plt.figure(figsize = (10,10))
#plt.pie([target_0_agerang_0,target_1_agerang_0], explode=explode, labels=['Target 0 Age Range 0','Target 1 Age Range 0'], colors=colors, autopct='%1.1f%%')
plt.pie([len(young_ages),len(middle_ages),len(elderly_ages)],labels=['young ages','middle ages','elderly ages'],explode=explode,colors=colors, autopct='%1.1f%%')
plt.title('Age States',color = 'blue',fontsize = 15)
plt.show()

### Distrubution of Age and Target with violinplot

In [None]:
plt.figure(figsize=(15,7))
sns.violinplot(x=df.age,y=df.target)
plt.xticks(rotation=90)
plt.legend()
plt.title("Age & Target System")
plt.show()

In [None]:
df.columns

### sex and ca (hue=target) with barplot

In [None]:
plt.figure(figsize=(10,7))
sns.barplot(x="sex",y = 'ca',hue = 'target',data=df);

### Sex and Oldpeak(hue=restecg)

In [None]:
plt.figure(figsize=(10,7))
sns.barplot(x="sex",y = 'oldpeak',hue = 'restecg',data=df);

### Count of target with hue=sex

In [None]:
sns.countplot(df.target,hue=df.sex)
plt.xlabel('Target')
plt.ylabel('Count')
plt.title('Target & Sex Counter 1 & 0')
plt.show()

### Number of people who have heart disease according to age 

In [None]:
plt.figure(figsize=(15,6))
sns.countplot(x='age',data = df, hue = 'target',palette='GnBu')
plt.show()#Number of people who have heart disease according to age 

## Correlation matrix(heatmap)

In [None]:
# Let's make our correlation matrix a little prettier
corr_matrix = df.corr()
fig, ax = plt.subplots(figsize=(15, 15))
ax = sns.heatmap(corr_matrix,
                 annot=True,
                 linewidths=0.5,
                 fmt=".2f",
                 cmap="YlGnBu");
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)

### Interpretation of heatmap
From the above correlation heat map, we can conclude that :-

* target and cp variable are mildly positively correlated (correlation coefficient = 0.43).

* target and thalach variable are also mildly positively correlated (correlation coefficient = 0.42).

* target and slope variable are weakly positively correlated (correlation coefficient = 0.35).

* target and exang variable are mildly negatively correlated (correlation coefficient = -0.44).

* target and oldpeak variable are also mildly negatively correlated (correlation coefficient = -0.43).

* target and ca variable are weakly negatively correlated (correlation coefficient = -0.39).

* target and thal variable are also waekly negatively correlated (correlation coefficient = -0.34).

In [None]:
df.groupby('cp',as_index=False)['target'].mean()

In [None]:
df.groupby('slope',as_index=False)['target'].mean()

In [None]:
df.groupby('target').mean()

In [None]:
num_var = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'target' ]
sns.pairplot(df[num_var], kind='scatter', diag_kind='hist')
plt.show()

In [None]:
num_var = ['cp', 'slope', 'exang', 'thalach', 'oldpeak','ca','thal', 'target' ]
sns.pairplot(df[num_var], kind='scatter', diag_kind='hist')
plt.show()

### correlation only with target and other variables

In [None]:
df.drop('target', axis=1).corrwith(df.target).plot(kind='bar', grid=True, figsize=(12, 8), 
                                                   title="Correlation with target")

## Target and Thalech

In [None]:
f, ax = plt.subplots(figsize=(8, 6))
sns.stripplot(x="target", y="thalach", data=df)
plt.show()

### Interpretation
* We can see that those people suffering from heart disease (target = 1) have relatively higher heart rate (thalach) as compared to people who are not suffering from heart disease (target = 0).


In [None]:
f, ax = plt.subplots(figsize=(8, 6))# with jitter
sns.stripplot(x="target", y="thalach", data=df, jitter = 0.01)
plt.show()

In [None]:
f, ax = plt.subplots(figsize=(8, 6))  #with boxplot
sns.boxplot(x="target", y="thalach", data=df)
plt.show()

#### CONCLUSION OF VISUALIZATION
Findings of Bivariate Analysis are as follows:

* There is no variable which has strong positive correlation with target variable.

* There is no variable which has strong negative correlation with target variable.

* There is no correlation between target and fbs.

* The cp and thalach variables are mildly positively correlated with target variable.

* We can see that the thalach variable is slightly negatively skewed.

* The people suffering from heart disease (target = 1) have relatively higher heart rate (thalach) as compared to people who are not suffering from heart disease (target = 0).

## LETS NORMALIZE THE VARIABLES 

### Normalization

In [None]:
y = df.target.values
x_dat = df.drop(['target'], axis = 1)
x=(x_dat-np.min(x_dat))/(np.max(x_dat)-np.min(x_dat)).values

# LETS TRY CLASSIFICATIONS METHODS
* Now we've got our data split into training and test sets, it's time to build a machine learning model.

* We'll train it (find the patterns) on the training set.

* And we'll test it (use the patterns) on the test set.

* We're going to try  machine learning models:
   * 1)Logistic Regression
   * 2)K-Nearest Neighbours Classifier
   * 3)Support Vector machine
   * 4)Decision Tree Classifier
   * 5)Random Forest Classifier

## 1)LOGISTIC REGRESSION:
 * Logistic Regression is a useful model to run early in the workflow. Logistic regression measures the relationship between the categorical dependent variable (feature) and one or more independent variables (features) by estimating probabilities using a logistic function, which is the cumulative logistic distribution.
 * Lojistik regresyon, kümülatif lojistik dağılım olan bir lojistik fonksiyon kullanarak olasılıkları tahmin ederek kategorik bağımlı değişken (özellik) ile bir veya daha fazla bağımsız değişken (özellik) arasındaki ilişkiyi ölçer.

### A ) Train-test splitting

In [None]:
y=df.target.values
x_dat=df.drop(["target"],axis=1)

In [None]:
from sklearn.model_selection import train_test_split,cross_val_score,ShuffleSplit,GridSearchCV
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25,random_state=42)

### B ) Modeling of Logistic R. Method

In [None]:
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression(solver = 'liblinear')
lr.fit(x_train,y_train)
y_pred=lr.predict(x_test)

In [None]:
lr #We can see what there is in lr(icinde hangi secenekler vargormek icin) 

#LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   #intercept_scaling=1, l1_ratio=None, max_iter=100,
                  # multi_class='auto', n_jobs=None, penalty='l2',
                  # random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                  # warm_start=False)

In [None]:
lr

In [None]:
lr.intercept_  #sabit katsayi

In [None]:
lr.coef_   #degisken katsayilari

### C ) Lets control the succes(score) prediction(accuracy_score,confusion m.) on test_data

In [None]:
l_score=accuracy_score(y_test,y_pred)
l_score
#The y predicted by the y in the test are compared(test deki y ile tahmin edilen yler karsilastiriliyor.Dogru tahmin etme yuzdesi bulunuyor)

In [None]:
c_l=confusion_matrix(y_test,y_pred)# We found the numbers of guessing with confusion matrix, 31 for 1 correct guess, 0 for 35 correct guess
c_l                               #The top was imported.
#confusion matrixle tahmin etme sayilarini bulduk,1 icin 31 i dogru tahmin,0 icin 35 i dogru tahmin
#En ustte import edildi.

In [None]:
from sklearn.metrics import confusion_matrix   #Hepsi icin yapilabilir
y_true=y_test
y_pred=lr.predict(x_test)
cmlr=confusion_matrix(y_true, y_pred)
f,ax=plt.subplots(figsize=(6,6))
sns.heatmap(cmlr, annot=True)
plt.xlabel("y_pred")
plt.ylabel("y_true")
plt.show()

#### CLASSICICATION REPORT: we can also see classification report.

In [None]:
#print(classification_report(y_test,y_pred)) #yukarda import edildi

#### LOOK AT ALL PREDICTION VALUE ON TEST DATA:

In [None]:
lr.predict(x_test)[0:10] #ilk 10 datatest deki tahminlerimiz

In [None]:
lr.predict_proba(x_test)[0:10] #1.si 0 olma 2.si 1 olma olasiligi oranlari

### E ) TUNING THE PREDICTION: WE can tune our prediction


In [None]:
y_probs = lr.predict_proba(x_test)[:,1]

In [None]:
y_pred = [1 if i>0.52 else 0 for i in y_probs]
y_pred[-10:]

In [None]:
confusion_matrix(y_test,y_pred)

In [None]:
accuracy_score(y_test,y_pred)

* We can see, If we change our condition for probobilty,our prediction and confusion matrix and accuracy_score change

## 2)NAIVE BAYES METHOD
* In machine learning, Naive Bayes classifiers are a family of simple probabilistic classifiers based on applying Bayes' theorem with strong (naive) independence assumptions between the features. 
* Naive Bayes classifiers are highly scalable, requiring a number of parameters linear in the number of variables (features) in a learning problem.


### A ) Train-test splitting

In [None]:
y = df.target
x = df.drop('target',axis = 1)
x_train,x_test,y_train,y_test = train_test_split(x,y,
                                                test_size = 0.25,
                                                random_state = 42)

### B ) Modeling of Naive B. Method

In [None]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(x_train, y_train)
y_pred=nb.predict(x_test)
y_pred[:10]

In [None]:
nb  #we can look at which option is there in GaussionNB

In [None]:
#?nb

### C ) Lets control the succes(score) prediction(accuracy_score,confusion m.) on test_data

In [None]:
n_score=accuracy_score(y_test,y_pred)
n_score

In [None]:
c_nb=confusion_matrix(y_test,y_pred)
c_nb
#confusion matrixle tahmin etme sayilarini bulduk,1 icin 32 i dogru tahmin,0 icin 30 i dogru tahmin
#En ustte import edildi.

#### HEATMAP IN CONFUSION MATRIX: We can see the confusion matrix in Heatmap.

In [None]:
from sklearn.metrics import confusion_matrix   #Hepsi icin yapilabilir
y_true=y_test
y_pred=nb.predict(x_test)
cmnb=confusion_matrix(y_true, y_pred)
f,ax=plt.subplots(figsize=(6,6))
sns.heatmap(cmnb, annot=True)
plt.xlabel("y_pred")
plt.ylabel("y_true")
plt.show()

#### CLASSICICATION REPORT: we can also see classification report.

In [None]:
#print(classification_report(y_test,y_pred)) #yukarda import edildi

#### LOOK AT ALL PREDICTION VALUE ON TEST DATA.

In [None]:
nb.predict(x_test)[0:10] #ilk 10 datatest deki tahminlerimiz

In [None]:
nb.predict_proba(x_test)[0:10] #1.si 0 olma 2.si 1 olma olasiligi oranlari

### E ) TUNING THE PREDICTION: WE can tune our prediction

In [None]:
y_probs = nb.predict_proba(x_test)[:,1]
y_pred = [1 if i>0.45 else 0 for i in y_probs]
y_pred[0:10]

In [None]:
nb_tuned_bestscore=accuracy_score(y_test,y_pred)
nb_tuned_bestscore

In [None]:

cmnb_best=confusion_matrix(y_test,y_pred) 
cmnb_best

* If we tune our data for nb, it increase a little.
* nb_tuned_bestscore= % 89 and cmnb_best are our best best score and our best confusion matrix

## 3)KNN METHOD
* In pattern recognition, the k-Nearest Neighbors algorithm (or k-NN for short) is a non-parametric method used for classification and regression. 
* A sample is classified by a majority vote of its neighbors, with the sample being assigned to the class most common among its k nearest neighbors (k is a positive integer, typically small). 
* In this method we need to choose k value.It means that we chose k number of points of classes which are nearest to the out test point. We can call this small data set. We count the number of classes in the small dataset and determine the highest number of class. Finally we can say our test point belongs to the class.

* While choosing k number we have to be carefull because small k value causes overfitting while big k value causes underfitting.

* Coding is the same for all supervised classes and we jus need to change the last part of the code.
* K=1 SECERSEK OVERFITTING OLABILIR, K= BUYUK SECERSEK UNDERFITTING OLABILIR

In [None]:
from sklearn.neighbors import KNeighborsClassifier
y=df.target
x=df.drop("target",axis=1)


### A ) Train-test splitting

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,
                                                test_size = 0.25,
                                                random_state = 42)

### B ) Modeling of KNN Medhod

In [None]:
knn = KNeighborsClassifier(n_neighbors=3).fit(x_train,y_train)
knn.fit(x_train,y_train)
y_pred = knn.predict(x_test)
y_pred

In [None]:
knn  ##we can look at which option is there in KNeighborsClassifier
#KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     #metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     #weights='uniform')

### C ) Lets control the succes(score) prediction(accuracy_score,confusion m.) on test_data

#### Look at accuracy_score

In [None]:
knn_score=accuracy_score(y_test,y_pred)
knn_score

In [None]:
c_knn=confusion_matrix(y_test,y_pred)
c_knn

#### HEATMAP IN CONFUSION MATRIX: We can see the confusion matrix in Heatmap.

In [None]:
from sklearn.metrics import confusion_matrix   #Hepsi icin yapilabilir
y_true=y_test
y_pred=knn.predict(x_test)
cmknn=confusion_matrix(y_true, y_pred)
f,ax=plt.subplots(figsize=(6,6))
sns.heatmap(cmknn, annot=True)
plt.xlabel("y_pred")
plt.ylabel("y_true")
plt.show()

#### CLASSICICATION REPORT: we can also see classification report.

In [None]:
#print(classification_report(y_test,y_pred)) #yukarda import edildi

#### LOOK AT ALL PREDICTION VALUE ON TEST DATA.

In [None]:
knn.predict(x_test)[0:10] #ilk 10 datatest deki tahminlerimiz

In [None]:
knn.predict_proba(x_test)[0:10] #1.si 0 olma 2.si 1 olma olasiligi oranlari

### E ) TUNING THE PREDICTION: WE can tune our prediction
* we can tune (n_neigbors,metric,..)

In [None]:
RMSE = []   # ERROR ON TRAIN DATA

for k in range(30):
    k = k+1
    knn = KNeighborsRegressor(n_neighbors = k).fit(x_train, y_train)
    y_pred = knn.predict(x_train) 
    rmse = np.sqrt(mean_squared_error(y_train,y_pred)) 
    RMSE.append(rmse) 
    print("k =" , k , "için RMSE değeri: ", rmse)

In [None]:
from sklearn.model_selection import GridSearchCV  
#We use Grid for tuning

In [None]:
knn_params = {'n_neighbors': np.arange(1,30,1)} #we obta
knn = KNeighborsRegressor()

In [None]:
knn_cv_model = GridSearchCV(knn, knn_params, cv = 10) #cross validation yontemi kullaniliyor.nesnesi tanimlandi
knn_cv_model.fit(x_train, y_train)

In [None]:
print("Best Score:"+str(knn_cv_model.best_score_))
print("Best Parameters:"+str(knn_cv_model.best_params_))

* If we use n_neighbors=21, we can obtain best score...

In [None]:
knn_tuned =KNeighborsClassifier(n_neighbors = 21)
knn_tuned = knn_tuned.fit(x_train,y_train)
y_pred = knn_tuned.predict(x_test)
knn_tuned_score=accuracy_score(y_test,y_pred)
knn_tuned_score

In [None]:
#np.sqrt(mean_squared_error(y_test, knn_tuned.predict(x_test)))

* we tune the knn ,than our score increase.

In [None]:
knn_tune2 =KNeighborsClassifier(n_neighbors = 21,metric='hamming')
knn_tune2.fit(x_train,y_train)
y_pred = knn_tune2.predict(x_test)
knn_tuned_bestscore=accuracy_score(y_test,y_pred)
knn_tuned_bestscore

In [None]:
from sklearn.metrics import confusion_matrix   #Hepsi icin yapilabilir
y_true=y_test
y_pred=knn_tune2.predict(x_test)
cmknn_best=confusion_matrix(y_true, y_pred)
f,ax=plt.subplots(figsize=(6,6))
sns.heatmap(cmknn_best, annot=True)
plt.xlabel("y_pred")
plt.ylabel("y_true")
plt.show()

* If we change metric and use tuned n-neigbors,acurracy_score is best..There are many kinds of metric in KNN. minkowski,hamming,.... 

### Conclusion KNN:
* knn_tuned_bestscore= % 85  and cmknn_best are our best best score and our best confusion matrix

## 4)SVM(SUPPORT VECTOR MACHINES)
* Support Vector Machines are supervised learning models with associated learning algorithms that analyze data used for classification and regression analysis. Given a set of training samples, each marked as belonging to one or the other of two categories, an SVM training algorithm builds a model that assigns new test samples to one category or the other, making it a non-probabilistic binary linear classifier.
* SVM is used fo both regression and classification problems, but generally for classification. There is a C parameter inside the SVM algoritma and the default value of C parameter is 1. If C is small, it causes the misclassification. If C is big, it causes ovetfitting. So we need to try C parameter to find best value.
* SVM, hem regresyon hem de sınıflandırma problemleri için kullanılır, ancak genellikle sınıflandırma için kullanılır. SVM içerisinde C parametresi vardır ve C parametresinin default değeri 1'dir. C'nin küçük olması yanlış sınıflandırmaya neden olur. C büyükse overfitting e neden olur. Bu yüzden en iyi değeri bulmak için C parametresini denememiz gerekiyor.

In [None]:
from sklearn.svm import SVC
y=df.target
x=df.drop("target",axis=1)

### A ) Train-test splitting

In [None]:
y = df.target
x = df.drop('target',axis = 1)
x_train,x_test,y_train,y_test = train_test_split(x,y,
                                                test_size = 0.25,
                                                random_state = 42)

### B ) Modeling of SVM Medhod

In [None]:
svm = SVC(C=5,degree=9,kernel = 'poly')
svm.fit(x_train,y_train)
y_pred = svm.predict(x_test)

In [None]:
#?svm

In [None]:
svm
#SVC(C=5, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    #decision_function_shape='ovr', degree=9, gamma='scale', kernel='poly',
    #max_iter=-1, probability=False, random_state=None, shrinking=True,
    #tol=0.001, verbose=False)

In [None]:
y_pred

### C ) Lets control the succes(score) prediction(accuracy_score,confusion m.) on test_data

In [None]:
svm_score1 = accuracy_score(y_test,y_pred)
svm_score1

In [None]:
c_svm=confusion_matrix(y_test,y_pred)
c_svm

#### HEATMAP IN CONFUSION MATRIX: We can see the confusion matrix in Heatmap.

In [None]:
from sklearn.metrics import confusion_matrix   #Hepsi icin yapilabilir
y_true=y_test
y_pred=svm.predict(x_test)
cmsvm=confusion_matrix(y_true, y_pred)
f,ax=plt.subplots(figsize=(6,6))
sns.heatmap(cmsvm, annot=True)
plt.xlabel("y_pred")
plt.ylabel("y_true")
plt.show()

#### CLASSICICATION REPORT: we can also see classification report.

In [None]:
#print(classification_report(y_test,y_pred)) #yukarda import edildi

### E ) TUNING THE PREDICTION: WE can tune our prediction
* Look at c,kernel,gamma

#### Tuning1-change C and gamma

In [None]:
#EN UYGUN C VE GAMMA DEGERI BULMA
svc_params ={"C":[0.00001,0.001,0.01,5,10,50,100],
            "gamma":[0.0001,0.001,0.01,1,5,10,50,100]}
svc =SVC()
svc_cv_model = GridSearchCV(svc,svc_params,
                           cv = 10,
                           n_jobs = -1,
                           verbose = 2)
svc_cv_model.fit(x_train,y_train)
print("Best Parameters:"+str(svc_cv_model.best_params_))

In [None]:
?svc_cv_model

In [None]:
# svm_tune1= SVC(C=100,gamma= 0.0001,degree=9,kernel = 'poly')
# svm_tune1.fit(x_train,y_train)
# y_pred = svm.predict(x_test)  # cok uzun suruyor

In [None]:
svm_score2 = accuracy_score(y_test,y_pred)
svm_score2

#### Tuning2-changing kernel=linear,c=100

In [None]:
#we changed the kernel,We can use linear,poly,rbf...
svm_tune2 = SVC(C=100,degree=9,kernel = 'linear')
svm_tune2.fit(x_train,y_train)
y_pred = svm_tune2.predict(x_test)
accuracy_score(y_test,y_pred)

Tuning3-changing kernel=rbf,c=100

In [None]:
#we changed the kernel,We can use linear,poly,rbf...
svm_tune3 = SVC(C=100,degree=9,kernel = 'rbf')
svm_tune3.fit(x_train,y_train)
y_pred = svm_tune3.predict(x_test)
accuracy_score(y_test,y_pred)

In [None]:
# svc_tuned=SVC(C=100,gamma=0.0001,kernel = 'linear')
# svc_tuned.fit(x_train,y_train)
# y_pred = svc_tuned.predict(x_test)
# accuracy_score(y_test,y_pred)    #uzun suruyor

### Conclusion:

* svm_score1=84 is the best score and c_svm is the best confusion matrix

## 5)RANDOM FOREST METHOD
Random Forests is one of the most popular model. Random forests or random decision forests are an ensemble learning method for classification, regression and other tasks, that operate by constructing a multitude of decision trees (n_estimators= [100, 300]) at training time and outputting the class that is the mode of the classes (classification) or mean prediction (regression) of the individual trees
* This methods basically use multiple number of decision trees and take the avarage of the results of these decision trees. And we use this avarage to determine the class of the test point.

* This is one of ensamble method which uses multiple classes to predict the target, and very powerfull technique.

In [None]:
from sklearn.ensemble import RandomForestClassifier
y=df.target
x=df.drop("target",axis=1)

### A ) Train-test splitting

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,
                                                test_size = 0.25,
                                                random_state = 42)

### B ) Modeling of SVM Medhod

In [None]:
rf=RandomForestClassifier()
rf.fit(x_train,y_train)
y_pred = rf.predict(x_test)
y_pred

In [None]:
rf
#RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       #criterion='gini', max_depth=None, max_features='auto',
                       #max_leaf_nodes=None, max_samples=None,
                      # min_impurity_decrease=0.0, min_impurity_split=None,
                      # min_samples_leaf=1, min_samples_split=2,
                      # min_weight_fraction_leaf=0.0, n_estimators=100,
                      # n_jobs=None, oob_score=False, random_state=None,
                      # verbose=0, warm_start=False)

### C ) Lets control the succes(score) prediction(accuracy_score,confusion m.) on test_data

In [None]:
rf_score=accuracy_score(y_test,y_pred)
rf_score

In [None]:
c_rf=confusion_matrix(y_test,y_pred)
c_rf

#### HEATMAP IN CONFUSION MATRIX: We can see the confusion matrix in Heatmap.

In [None]:
from sklearn.metrics import confusion_matrix   #Hepsi icin yapilabilir
y_true=y_test
y_pred=rf.predict(x_test)
cmlr=confusion_matrix(y_true, y_pred)
f,ax=plt.subplots(figsize=(6,6))
sns.heatmap(cmlr, annot=True)
plt.xlabel("y_pred")
plt.ylabel("y_true")
plt.show()

#### CLASSICICATION REPORT: we can also see classification report.

In [None]:
print(classification_report(y_test,y_pred)) #yukarda import edildi

#### LOOK AT ALL PREDICTION VALUE ON TEST DATA.

In [None]:
rf.predict(x_test)[0:10] #ilk 10 datatest deki tahminlerimiz

In [None]:
rf.predict_proba(x_test)[0:10] #1.si 0 olma 2.si 1 olma olasiligi oranlari

### E ) TUNING THE PREDICTION: WE can tune our prediction
* n_estimators,importance variables,

In [None]:
from sklearn.ensemble import RandomForestClassifier  #n_estimotors=11 is best
score_list=[]
for each in range(1,75):
    rf2=RandomForestClassifier(n_estimators=each, random_state=42)
    rf2.fit(x_train, y_train)
    score_list.append(100*rf2.score(x_test, y_test))
    print("n_estimators=", each, "--> Accuracy:", 100*rf2.score(x_test, y_test), "%")

plt.plot([*range(1,75)], score_list)
plt.xlabel("n_estimators Value")
plt.ylabel("Accuracy %")
plt.show()

### Lets look at importance 6 variables

In [None]:
Importance = pd.DataFrame({"Importance": rf.feature_importances_*100},
                         index = x_train.columns)
Importance.sort_values(by = "Importance", 
                       axis = 0, 
                       ascending = True).plot(kind ="barh", color = "g")

plt.xlabel("Variable Severity Levels");

In [None]:
y=df.target
x=df[['ca','oldpeak','thal','cp','thalach','age']]

In [None]:

x_train, x_test, y_train, y_test = train_test_split(x, y, 
                                                    test_size=0.25, 
                                                    random_state=42)

In [None]:
rf_2 = RandomForestClassifier().fit(x_train, y_train)
y_pred = rf_2.predict(x_test)
rf_2_score=accuracy_score(y_test, y_pred)
rf_2_score

In [None]:
c_rf2=confusion_matrix(y_test,y_pred)
c_rf2

In [None]:
rf_params = {"max_depth": [2,5,8,10],
            "max_features": [2,5,8],
            "n_estimators": [10,500,1000],
            "min_samples_split": [2,5,10]}

In [None]:
rf_model1 = RandomForestClassifier()

rf_cv_model1 = GridSearchCV(rf_model1, 
                           rf_params, 
                           cv = 10, 
                           n_jobs = -1, 
                           verbose = 2)

In [None]:
#rf_cv_model1.fit(x_train, y_train)    #uzun suruyor

In [None]:
#print("Best Parameters: " + str(rf_cv_model1.best_params_))  #uzun suruyor

In [None]:
rf_tuned1 = RandomForestClassifier(max_depth = 2, 
                                  max_features = 2, 
                                  min_samples_split = 2,
                                  n_estimators = 500)

rf_tuned1.fit(x_train, y_train)

In [None]:
y_pred = rf_tuned1.predict(x_test)
rf_tuned_score=accuracy_score(y_test, y_pred)
rf_tuned_score

#### Conclusion:

* rf2_score=84 is the best score and c_rf2 is the best confusion matrix

## 6 ) DECISION TREE METHOD
This model uses a Decision Tree as a predictive model which maps features (tree branches) to conclusions about the target value (tree leaves). Tree models where the target variable can take a finite set of values are called classification trees; in these tree structures, leaves represent class labels and branches represent conjunctions of features that lead to those class labels. Decision trees where the target variable can take continuous values (typically real numbers) are called regression trees.
* Decision tree builds classification or regression models in the form of a tree structure. It breaks down a dataset into smaller and smaller subsets while at the same time an associated decision tree is incrementally developed".

* According to ınformation entropy, we can determine which feature is the most important. And we put the most important one to the top of the related tree.

* Decision tree classification can be used for both binary and multi classes

* Coding is the same for all supervised classes and we jus need to change the last part of the code.

In [None]:
from sklearn.tree import DecisionTreeClassifier
y=df.target
x=df.drop("target",axis=1)

### A ) Train -test splitting

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,
                                                test_size = 0.25,
                                                random_state = 42)

### B ) Modeling of Decision Tree

In [None]:
dtc = DecisionTreeClassifier()
dtc.fit(x_train, y_train)

In [None]:
y_pred = dtc.predict(x_test)
y_pred

### C ) Lets control the succes(score) prediction(accuracy_score,confusion m.) on test_data

In [None]:
dtc_score=accuracy_score(y_test,y_pred)
dtc_score

In [None]:
c_dtc=confusion_matrix(y_test,y_pred)
c_dtc

In [None]:
from sklearn.metrics import confusion_matrix   #Hepsi icin yapilabilir
y_true=y_test
y_pred=dtc.predict(x_test)
cmdtc=confusion_matrix(y_true, y_pred)
f,ax=plt.subplots(figsize=(6,6))
sns.heatmap(cmdtc, annot=True)
plt.xlabel("y_pred")
plt.ylabel("y_true")
plt.show()

In [None]:
print(classification_report(y_test,y_pred))

### D ) Model tuning

In [None]:
tree_grid = {"max_depth": range(1,10),
            "min_samples_split" : list(range(2,50)) }

In [None]:
tree1 = DecisionTreeClassifier()
tree_cv = GridSearchCV(tree1, tree_grid, cv = 10, n_jobs = -1, verbose = 2)
tree_cv_model = tree_cv.fit(x_train, y_train)

In [None]:
#?tree_cv_model

In [None]:
print("Best Parameters: " + str(tree_cv_model.best_params_))

In [None]:
tree1 = DecisionTreeClassifier(max_depth = 3, min_samples_split = 2)
tree_tuned1 = tree1.fit(x_train, y_train)

In [None]:
y_pred = tree_tuned1.predict(x_test)
dtc_tuned_bestscore=accuracy_score(y_test, y_pred)
dtc_tuned_bestscore

### Lets look at 6 importance variables

In [None]:
Importance = pd.DataFrame({"Importance": dtc.feature_importances_*100},
                         index = x_train.columns)
Importance.sort_values(by = "Importance", 
                       axis = 0, 
                       ascending = True).plot(kind ="barh", color = "g")

plt.xlabel("Variable Severity Levels");

In [None]:
y=df.target
x=df[['ca','oldpeak','thal','cp','thalach','age']]

In [None]:

x_train, x_test, y_train, y_test = train_test_split(x, y, 
                                                    test_size=0.25, 
                                                    random_state=42)

In [None]:
dtc2 = RandomForestClassifier().fit(x_train, y_train)
y_pred = dtc2.predict(x_test)
dtc2_score=accuracy_score(y_test, y_pred)
dtc2_score

In [None]:
c_dtc2=confusion_matrix(y_test,y_pred)
c_dtc2

In [None]:
dtc_tuned1 = RandomForestClassifier(max_depth = 2, 
                                  max_features = 2, 
                                  min_samples_split = 2,
                                  n_estimators = 500)
dtc_tuned1.fit(x_train, y_train)

In [None]:
y_pred = dtc_tuned1.predict(x_test)
dtc_tuned_bestscore=accuracy_score(y_test, y_pred)
dtc_tuned_bestscore

In [None]:
c_bestdtc=confusion_matrix(y_test,y_pred)
c_bestdtc

## LETS SEE ALL SCORE OF CLASSIFICATIONS METHODS 

In [None]:
indexx = ["Log","KNN","SVM","NB","RF","DT"]
regressions = [l_score,knn_tuned_bestscore,svm_score1,nb_tuned_bestscore,rf_2_score,dtc_tuned_bestscore]

plt.figure(figsize=(8,6))
sns.barplot(x=indexx,y=regressions)
plt.xticks()
plt.title('Model Comparision',color = 'orange',fontsize=20);

In [None]:
plt.figure(figsize=(24,12))

plt.suptitle("Confusion Matrixes",fontsize=24)
plt.subplots_adjust(wspace = 0.4, hspace= 0.4)

plt.subplot(2,3,1)
plt.title("Logistic Regression Confusion Matrix")
sns.heatmap(c_l,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 24})

plt.subplot(2,3,2)
plt.title("K Nearest Neighbors Confusion Matrix")
sns.heatmap(cmknn_best,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 24})

plt.subplot(2,3,3)
plt.title("Support Vector Machine Confusion Matrix")
sns.heatmap(c_svm,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 24})

plt.subplot(2,3,4)
plt.title("Naive Bayes Confusion Matrix")
sns.heatmap(cmnb_best,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 24})

plt.subplot(2,3,5)
plt.title("Decision Tree Classifier Confusion Matrix")
sns.heatmap(c_bestdtc,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 24})

plt.subplot(2,3,6)
plt.title("Random Forest Confusion Matrix")
sns.heatmap(c_rf2,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 24})

plt.show()

## FINISH