# Decision Tree Regression

## Importing the libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

## Importing the dataset

In [None]:
dataset = pd.read_csv('../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')
dataset.head(10)

In [None]:
dataset.isnull().sum()

In [None]:
heatmap = dataset.corr()

sns.heatmap(heatmap,cmap = 'YlGnBu')

In [None]:
dataset.head()

In [None]:
dataset.info()

In [None]:
dataset['DEATH_EVENT'].unique()

In [None]:
from sklearn.preprocessing import LabelEncoder

# Set up the LabelEncoder object
enc = LabelEncoder()

# Apply the encoding to the "Accessible" column
dataset['sex'] = enc.fit_transform(dataset['sex'])
dataset['smoking'] = enc.fit_transform(dataset['smoking'])
dataset.head()

In [None]:
from sklearn.impute import SimpleImputer

impute = SimpleImputer(missing_values=np.nan, strategy='median')

impute.fit(dataset[['serum_sodium','time']])

dataset[['serum_sodium','time']] = impute.transform(dataset[['serum_sodium','time']])
dataset.head(20)

In [None]:
dataset.isnull().sum()

In [None]:
X = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

In [None]:
from sklearn.model_selection import train_test_split

Xtrain,Xtest, Ytrain, Ytest = train_test_split(X,y,test_size = 0.2,random_state=0,stratify=y)

## Training the Decision Tree Regression model on the whole dataset

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
classifier = DecisionTreeClassifier(criterion='entropy', random_state = 0)
classifier.fit(Xtrain, Ytrain)

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(Xtrain, Ytrain)
logistic = LogisticRegression(random_state=0)
logistic.fit(Xtrain,Ytrain)

print("KNN Test set accuracy: {:.2f}".format(knn.score(Xtest, Ytest)))
print("Decision Tree Test set accuracy: {:.2f}".format(classifier.score(Xtest, Ytest)))
print("Logistic Regression Test set accuracy: {:.2f}".format(logistic.score(Xtest, Ytest)))

## Bar plot visualisation

In [None]:
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
classes = ['Decision Tree', 'Logistic Regression', 'KNN']
scores =[classifier.score(Xtest, Ytest),logistic.score(Xtest, Ytest),knn.score(Xtest, Ytest)]
ax.bar(classes, scores)
plt.show()