In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# **Heart Dataset**

In [None]:
heart_data = pd.read_csv("/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv")
print("Shape of heart.csv: ",heart_data.shape)
heart_data.head()

# **O2 Saturation**

In [None]:
o2sat_data = pd.read_csv("/kaggle/input/heart-attack-analysis-prediction-dataset/o2Saturation.csv")
print("Shape of o2Saturation.csv: ",o2sat_data.shape)
o2sat_data.head()

# **Exploratory Data Analysis**

In [None]:
from pandas_profiling import ProfileReport

In [None]:
profile = ProfileReport(heart_data, title="Heart Attack Dataset", explorative=True)
profile.to_widgets()

In [None]:
from matplotlib import pyplot as plt

In [None]:
def normal_scatter(x,y):
    fig = plt.figure(figsize=(4, 4))
    plt.scatter(x,y)

In [None]:
def boxplot(data, column, groupby):
    fig, ax = plt.subplots(figsize=(4,4))
    plt.suptitle('')
    data.boxplot(column=column, by=groupby, ax=ax, grid=False, rot=0)

# **Distribution and Scatter of thall and output**

In [None]:
normal_scatter(heart_data['thall'], heart_data['output'])
boxplot(heart_data, ["thall"],"output")

# **Distribution and Scatter of exng and output**

In [None]:
normal_scatter(heart_data['exng'], heart_data['output'])
boxplot(heart_data, ["exng"],"output")

# **Distribution and Scatter of exng and output**

In [None]:
normal_scatter(heart_data['exng'], heart_data['output'])
boxplot(heart_data, ["exng"],"output")

# **Distribution and Scatter of cp and output**

In [None]:
normal_scatter(heart_data['cp'], heart_data['output'])
boxplot(heart_data, ["cp"],"output")

# **Distribution and Scatter of thalachh and output**

In [None]:
normal_scatter(heart_data['thalachh'], heart_data['output'])
boxplot(heart_data, ["thalachh"],"output")

# **Kernal Distribution Function**

In [None]:
# To understand the density function and also the correlation between different variables
pd.plotting.scatter_matrix(heart_data.loc[:, heart_data.columns], diagonal="kde",figsize=(20,15))
plt.show()

In [None]:
# To have an understanding of the range of values of continuous variables
ax = heart_data[["age","trtbps","chol","thalachh","oldpeak"]].plot(figsize=(20,15))
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5));

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(heart_data[heart_data.columns[~heart_data.columns.isin(['output'])]
], heart_data[heart_data.columns[heart_data.columns.isin(['output'])]
], test_size=0.3, random_state=42)

In [None]:
X_train = X_train.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from scipy import stats

# **LDA Analysis**

In [None]:
lda = LinearDiscriminantAnalysis().fit(X_train, y_train)

In [None]:
def pretty_scalings(lda, X, out=False):
    ret = pd.DataFrame(lda.scalings_, index=X.columns, columns=["LD"+str(i+1) for i in range(lda.scalings_.shape[1])])
    if out:
        print("Coefficients of linear discriminants:")
        display(ret)
    return ret

pretty_scalings_ = pretty_scalings(lda, X_train, out=True)

In [None]:
def calclda(variables, loadings):
    # find the number of samples in the data set and the number of variables
    numsamples, numvariables = variables.shape
    # make a vector to store the discriminant function
    ld = np.zeros(numsamples)
    # calculate the value of the discriminant function for each sample
    for i in range(numsamples):
        valuei = 0
        for j in range(numvariables):
            valueij = variables.iloc[i, j]
            loadingj = loadings[j]
            valuei = valuei + (valueij * loadingj)
        ld[i] = valuei
    # standardise the discriminant function so that its mean value is 0:
    ld = scale(ld, with_std=False)
    return ld

In [None]:
import seaborn as sns
from sklearn.preprocessing import scale

In [None]:
calclda(X_train, lda.scalings_[:, 0])

In [None]:
lda.fit_transform(X_train, y_train)[:, 0]

In [None]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, lda.predict(X_test),labels=[0,1])

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, lda.predict(X_test), target_names=["0","1"]))