# Heart Disease Prediction 

## Importing the libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Importing and Reading the dataset

In [None]:
df = pd.read_csv('framingham.csv')
df.head()

## Analysis of Data

In [None]:
df.shape

In [None]:
df.keys()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isna().sum()

### Removing NaN / NULL vales from the data

In [None]:
df.dropna(axis = 0, inplace = True) 
print(df.shape)

In [None]:
df['TenYearCHD'].value_counts()

## Data Visualization

### Correlation Matrix

In [None]:
plt.figure(figsize = (14, 10)) 
sns.heatmap(df.corr(), cmap='Purples',annot=True, linecolor='Green', linewidths=1.0)
plt.show()

### Pairplot

In [None]:
sns.pairplot(df)
plt.show()

### Countplot of people based on their sex and whether they are Current Smoker or not

In [None]:
sns.catplot(data=df, kind='count', x='male',hue='currentSmoker')
plt.show()

### Countplot - subplots of No. of people affecting with CHD on basis of their sex and current smoking.

In [None]:
sns.catplot(data=df, kind='count', x='TenYearCHD', col='male',row='currentSmoker', palette='Blues')
plt.show()

## Machine Learning Part

### Separating the data into feature and target data.

In [None]:
X = df.iloc[:,0:15]
y = df.iloc[:,15:16]

In [None]:
X.head()

In [None]:
y.head()

### Importing the model and assigning the data for training and test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=21)


### Applying the ML model - Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()

### Training the data

In [None]:
logreg.fit(X_train, y_train)

### Testing the data

In [None]:
y_pred = logreg.predict(X_test)

### Predicting the score

In [None]:
score = logreg.score(X_test, y_test)
print("Prediction score is:",score) 

## Getting the Confusion Matrix and Classification Report

### Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, classification_report 
cm = confusion_matrix(y_test, y_pred) 
print("Confusion Matrix is:\n",cm)

### Classification Report

In [None]:
print("Classification Report is:\n\n",classification_report(y_test,y_pred))

### Plotting the confusion matrix

In [None]:
conf_matrix = pd.DataFrame(data = cm,  
                           columns = ['Predicted:0', 'Predicted:1'],  
                           index =['Actual:0', 'Actual:1']) 
plt.figure(figsize = (10, 6)) 
sns.heatmap(conf_matrix, annot = True, fmt = 'd', cmap = "Greens", linecolor="Blue", linewidths=1.5) 
plt.show() 