In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
ds = pd.read_csv('Heart.csv')
print(ds)

     Unnamed: 0  Age  Sex     ChestPain  RestBP  Chol  Fbs  RestECG  MaxHR  \
0             1   63    1       typical     145   233    1        2    150   
1             2   67    1  asymptomatic     160   286    0        2    108   
2             3   67    1  asymptomatic     120   229    0        2    129   
3             4   37    1    nonanginal     130   250    0        0    187   
4             5   41    0    nontypical     130   204    0        2    172   
..          ...  ...  ...           ...     ...   ...  ...      ...    ...   
298         299   45    1       typical     110   264    0        0    132   
299         300   68    1  asymptomatic     144   193    1        0    141   
300         301   57    1  asymptomatic     130   131    0        0    115   
301         302   57    0    nontypical     130   236    0        2    174   
302         303   38    1    nonanginal     138   175    0        0    173   

     ExAng  Oldpeak  Slope   Ca        Thal  AHD  
0        0  

In [3]:
# Transform ChestPain & Thal string data to numeric data
le=LabelEncoder()
ds["ChestPain"]=le.fit_transform(ds["ChestPain"])
ds["Thal"]=le.fit_transform(ds["Thal"].astype(str))
ds["AHD"]=le.fit_transform(ds["AHD"].astype(str))

In [4]:
#replaced zeros
zeros_not_accepted = ['ChestPain','RestBP','Chol','Fbs','RestECG','MaxHR','ExAng','Oldpeak','Slope','Ca','Thal','AHD']
for column in zeros_not_accepted:
    ds[column] = ds[column].replace(0,np.NaN)
    mean=int(ds[column].mean(skipna=True))
    ds[column]=ds[column].replace(np.NaN,mean)

In [5]:
print(ds)

     Unnamed: 0  Age  Sex  ChestPain  RestBP  Chol  Fbs  RestECG  MaxHR  \
0             1   63    1        3.0     145   233  1.0      2.0    150   
1             2   67    1        1.0     160   286  1.0      2.0    108   
2             3   67    1        1.0     120   229  1.0      2.0    129   
3             4   37    1        1.0     130   250  1.0      1.0    187   
4             5   41    0        2.0     130   204  1.0      2.0    172   
..          ...  ...  ...        ...     ...   ...  ...      ...    ...   
298         299   45    1        3.0     110   264  1.0      1.0    132   
299         300   68    1        1.0     144   193  1.0      1.0    141   
300         301   57    1        1.0     130   131  1.0      1.0    115   
301         302   57    0        2.0     130   236  1.0      2.0    174   
302         303   38    1        1.0     138   175  1.0      1.0    173   

     ExAng  Oldpeak  Slope   Ca  Thal  AHD  
0      1.0      2.3      3  1.0   2.0  1.0  
1      1.

In [7]:
X = ds.drop(['Unnamed: 0','AHD'],axis=1).values
Y = ds['AHD'].values
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,random_state=0,test_size=0.2)

In [8]:
# prepare model
from sklearn.linear_model import LinearRegression
model = LinearRegression()

In [9]:
# train the model
model.fit(X_train, Y_train)

LinearRegression()

In [10]:
# prediction
Y_pred = model.predict(X_test)
#yPred

In [11]:
from sklearn.metrics import r2_score
r2 = r2_score(Y_test, Y_pred)
r2

1.0

In [12]:
# cross validation
accu = []
for i in range(1, 101):
  xTrain, xTest, yTrain, yTest = train_test_split(X, Y, train_size=0.80, random_state=i)
  model = LinearRegression()
  model.fit(xTrain, yTrain)
  yPred = model.predict(xTest)
  r2 = r2_score(yTest, yPred)
  accu.append(r2)

accu_array = np.array(accu)
print(accu_array)
accu_array.max()

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1.]


1.0

In [13]:
#Co-efficient
m = model.coef_

In [14]:
#intercept
c = model.intercept_

In [15]:
y= m*X + c
print(y)

[[1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 ...
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]]
