# Cardiovascular Disease Prediction Model

## Importing required library

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression , Lasso , Ridge , ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error,mean_absolute_error
from sklearn.model_selection import train_test_split


### Reading csv file

In [2]:
dataset=pd.read_csv("./Heart_data.csv") 
dataset.head(2)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,,2.0,3.0,3.0,2


### Printing the detail of column

In [37]:
dataset.info() # Printing the detail of column

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1592 entries, 0 to 1591
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1513 non-null   float64
 1   sex       1561 non-null   float64
 2   cp        1433 non-null   float64
 3   trestbps  1592 non-null   float64
 4   chol      1529 non-null   float64
 5   fbs       1513 non-null   float64
 6   restecg   1585 non-null   float64
 7   thalach   1592 non-null   float64
 8   exang     1545 non-null   float64
 9   oldpeak   1561 non-null   float64
 10  slope     1538 non-null   float64
 11  ca        1592 non-null   float64
 12  thal      1585 non-null   float64
 13  target    1592 non-null   int64  
dtypes: float64(13), int64(1)
memory usage: 174.2 KB


### finding the missing value in dataset

In [38]:
dataset.isnull().sum()


age          79
sex          31
cp          159
trestbps      0
chol         63
fbs          79
restecg       7
thalach       0
exang        47
oldpeak      31
slope        54
ca            0
thal          7
target        0
dtype: int64

### Removing the column with missing value

In [39]:
dataset.dropna(inplace=True)

In [40]:
dataset.head(5)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0
5,56.0,1.0,2.0,120.0,236.0,0.0,0.0,178.0,0.0,0.8,1.0,0.0,3.0,0
6,62.0,0.0,4.0,140.0,268.0,0.0,2.0,160.0,0.0,3.6,3.0,2.0,3.0,3


In [41]:
dataset.shape #Number of row and column in dataset

(1113, 14)

### Reseting the index


In [42]:
dataset.reset_index(inplace=True)


In [43]:
dataset.head(5)

Unnamed: 0,index,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
2,4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0
3,5,56.0,1.0,2.0,120.0,236.0,0.0,0.0,178.0,0.0,0.8,1.0,0.0,3.0,0
4,6,62.0,0.0,4.0,140.0,268.0,0.0,2.0,160.0,0.0,3.6,3.0,2.0,3.0,3


## Separating the input and output data

In [44]:
input_data=dataset.iloc[:,:-1]
output_data=dataset["target"]



## Standardize Input Data

In [45]:
ss=StandardScaler()
input_data=pd.DataFrame(ss.fit_transform(input_data),columns=input_data.columns)


### Spliting the data for testing and training the model

In [46]:
x_train,x_test,y_train,y_test=train_test_split(input_data,output_data,test_size=0.3,random_state=42)

## Linear Regression Model 

In [47]:
lr=LinearRegression()
lr.fit(x_train,y_train)
print(f"Accuracy :  {lr.score(x_test,y_test)*100} ")

Accuracy :  46.07993614892 


### Lasso Model 

In [48]:
lr1=Lasso(alpha=0.01)
lr1.fit(x_train,y_train)
print(f"Accuracy :  {lr1.score(x_test,y_test)*100} ")

Accuracy :  47.0149493543638 


### Ridge Model

In [59]:
lr2=Ridge()
lr2.fit(x_train,y_train)
print(f"Accuracy :  {lr2.score(x_test,y_test)*100} ")

Accuracy :  46.08303409003417 


### ElasticNet Model

In [60]:
lr3=ElasticNet(alpha=0.000001)
lr3.fit(x_train,y_train)
print(f"Accuracy :  {lr3.score(x_test,y_test)*100} ")

Accuracy :  46.08000185127052 


### DecisionTreeRegressor

In [61]:
dt=DecisionTreeRegressor()
dt.fit(x_train,y_train)
print(f"Accuracy :  {dt.score(x_test,y_test)*100} ")

Accuracy :  37.29084985366392 


### RandomForestRegressor

In [64]:
rf=RandomForestRegressor()
rf.fit(x_train,y_train)
print(f"Accuracy :  {rf.score(x_test,y_test)*100} ")

Accuracy :  70.40870478767464 


In [65]:
ans=rf.predict([x_train.iloc[100]]),y_train.iloc[100]





### SVR Model

In [66]:
sv=SVR()
sv.fit(x_train,y_train)
print(f"Accuracy :  {sv.score(x_test,y_test)*100} ")

Accuracy :  60.68871383220967 


### KNeighborsRegressor Model

In [67]:
knn=KNeighborsRegressor(n_neighbors=10)
knn.fit(x_train,y_train)
print(f"Accuracy :  {knn.score(x_test,y_test)*100} ")

Accuracy :  61.14799271080679 
