**Importing libraries and dataset**

In [None]:
import pandas as pd
import numpy as np

dataset = pd.read_csv('../input/heart-disease-prediction-using-logistic-regression/framingham.csv')

**Checking missing values**

In [None]:
# Any missing values?
print(dataset.isnull().values.any())
# Sum of missing values in different columns
print(dataset.isnull().sum())

**# Dropping columns and spliiting into dependent and independent variables**

In [None]:
dataset = dataset.drop(dataset.columns[[2, 14]], axis=1)
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

**# Replacing the missing data after dropping columns**

In [None]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
imputer.fit(x)
x = imputer.transform(x)

# Choosing the most_frequent startegy to be replaced by the missing values since one of the features consists of nomial values

**# Splitting the data set into training set and test set**

In [None]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x, y, train_size=0.7, random_state=666)

**# Feature Scaling**

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
xtrain = sc.fit_transform(xtrain)
xtest = sc.transform(xtest)

**# Feature Selection**

In [None]:
# Applying Principal Component Analysis
from sklearn.decomposition import PCA
pca = PCA(n_components=12)
xtrain = pca.fit_transform(xtrain)
xtest = pca.transform(xtest)

**# Training the Logistic Regression on the training set**

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(C=0.5, random_state=666)
lr.fit(xtrain, ytrain)

**# Predicting test set results**

In [None]:
value_predict = lr.predict(xtest)
# print(np.concatenate((value_predict.reshape(len(value_predict), 1), ytest.reshape(len(ytest), 1)), axis=1))

**# Creating the confusion matrix (observes the no. of correct and incorrect predictions)**

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score
print(confusion_matrix(ytest, value_predict))
print(accuracy_score(ytest, value_predict)*100)
print(recall_score(ytest, value_predict)*100)
print(precision_score(ytest, value_predict)*100)