# Heart Attack Analysis

## Import necessary libraries

In [128]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import joblib

## Read the dataset from CSV file

In [151]:
dataset=pd.read_csv('heart.csv')
dataset.head()

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


## Create Correlation Matrix & check for correlation between "output" and other features

In [152]:
correlation=dataset.corr()
correlation['output']

age        -0.225439
sex        -0.280937
cp          0.433798
trtbps     -0.144931
chol       -0.085239
fbs        -0.028046
restecg     0.137230
thalachh    0.421741
exng       -0.436757
oldpeak    -0.430696
slp         0.345877
caa        -0.391724
thall      -0.344029
output      1.000000
Name: output, dtype: float64

## Check which features have strong correlation with output

In [153]:
print(correlation['output'][correlation['output']>0.4])
print(correlation['output'][correlation['output']<-0.4])

cp          0.433798
thalachh    0.421741
output      1.000000
Name: output, dtype: float64
exng      -0.436757
oldpeak   -0.430696
Name: output, dtype: float64


## Take features & target

In [154]:
features=['cp','thalachh','exng','oldpeak']
target='output'

## Split the data into training & testing set

In [155]:
train_set,test_set=train_test_split(dataset,test_size=0.2,random_state=42)

## Create LinearRegression model & fit training set into it

In [156]:
linear_model=LinearRegression().fit(train_set[features],train_set[target])

## Predict the result for test set

In [157]:
predicted_outputs=linear_model.predict(test_set[features])

## Here, the output is either 1 or 0 so the required task is Classification
## However, Linear Regression can be used here by assuming the output is 1 if the predicted output>0.5 and vice versa

In [158]:
answer=[1 if i>0.5 else 0 for i in predicted_outputs]

## Mean Square Error

In [159]:
mean_squared_error(test_set[target],answer)

0.14754098360655737

## Check how many False Positives & False Negatives were found

In [160]:
q=0
for i,j in zip(answer,test_set[target]):q+=+i!=j
print(f"{q}/{len(answer)} cases were predicted wrongly")
print(f"Model Accuracy on Test Set is {round((len(answer)-q)*100/len(answer),2)}")

9/61 cases were predicted wrongly
Model Accuracy on Test Set is 85.25


## Dump the model

In [161]:
joblib.dump(linear_model, "finalized_model.sav")

['finalized_model.sav']

# Hurray !!!