# getting started:

In [None]:
import numpy as np 
import pandas as pd 

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df = pd.read_csv('/kaggle/input/heart-disease-uci/heart.csv')

In [None]:
df.head()

In [None]:
df.isnull().sum()

In [None]:
df.describe().transpose()

# exploratory data-analysis:

In [None]:
import matplotlib.pyplot as plt 
import seaborn as sns 

l = df.columns

In [None]:
l

In [None]:
df['sex'].value_counts().plot(kind='bar')

In [None]:
plt.scatter(df['chol'],df['trestbps'])

In [None]:
sns.heatmap(df.corr())

In [None]:
plt.scatter(df['thal'],df['oldpeak'])

In [None]:
sns.scatterplot(df['thalach'], df['slope'], hue=df['target'])

# modelling the data using classifiers :

In [None]:
X = df[l[:-1]]

y = df[l[-1]]

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42,test_size=0.33)

scaled_train = scaler.fit_transform(X_train)

scaled_test = scaler.transform(X_test)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

**using logistic regression :**

In [None]:
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression()

lr_model.fit(scaled_train,y_train)

preds = lr_model.predict(scaled_test)

In [None]:
print(confusion_matrix(y_test,preds))

In [None]:
print(classification_report(y_test,preds))

**using PCA to reduced dimensionality of the data-set:**

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(3)

reduced_train = pca.fit_transform(scaled_train)

reduced_test = pca.transform(scaled_test)

In [None]:
reduced_train.shape

In [None]:
plt.scatter(reduced_train[:,0],reduced_train[:,1])

In [None]:
plt.scatter(reduced_train[:,1],reduced_train[:,2])

In [None]:
plt.scatter(reduced_train[:,0],reduced_train[:,2])

**using logistic regression with PCA:**

In [None]:
lr_model = LogisticRegression()

lr_model.fit(reduced_train,y_train)

preds = lr_model.predict(reduced_test)

In [None]:
print(confusion_matrix(y_test,preds))

In [None]:
print(classification_report(y_test,preds))

In [None]:
from sklearn.metrics import accuracy_score
iv = []
av = []
for i in range(1,13):
    pca = PCA(i)

    reduced_train = pca.fit_transform(scaled_train)

    reduced_test = pca.transform(scaled_test)
    
    lr_model = LogisticRegression()

    lr_model.fit(reduced_train,y_train)

    preds = lr_model.predict(reduced_test)
    
    iv.append(i)
    av.append(accuracy_score(y_test,preds))

In [None]:
plt.plot(iv,av)

**using SVMs with PCA:**

In [None]:
from sklearn.svm import SVC

In [None]:
iv = []
av = []
for i in range(1,13):
    pca = PCA(i)

    reduced_train = pca.fit_transform(scaled_train)

    reduced_test = pca.transform(scaled_test)
    
    svc = SVC()

    svc.fit(reduced_train,y_train)

    preds = svc.predict(reduced_test)
    
    iv.append(i)
    av.append(accuracy_score(y_test,preds))

In [None]:
plt.plot(iv,av)

In [None]:
pca = PCA(4)

reduced_train = pca.fit_transform(scaled_train)

reduced_test = pca.transform(scaled_test)
    
svc = SVC()

svc.fit(reduced_train,y_train)

preds = svc.predict(reduced_test)

In [None]:
print(classification_report(y_test,preds))

**using SVMs without PCA:**

In [None]:
svc = SVC()

svc.fit(scaled_train,y_train)

preds = svc.predict(scaled_test)

In [None]:
print(classification_report(y_test,preds))

In [None]:
from sklearn.preprocessing import PolynomialFeatures


poly = PolynomialFeatures(2)

x_poly_train = poly.fit_transform(scaled_train)

x_poly_test = poly.transform(scaled_test)

In [None]:
svc = SVC()

svc.fit(x_poly_train,y_train)

preds = svc.predict(x_poly_test)

In [None]:
print(classification_report(y_test,preds))

In [None]:
iv = []
av = []
for i in range(1,106):
    pca = PCA(i)

    reduced_train = pca.fit_transform(x_poly_train)

    reduced_test = pca.transform(x_poly_test)
    
    svc = SVC()

    svc.fit(reduced_train,y_train)

    preds = svc.predict(reduced_test)
    
    iv.append(i)
    av.append(accuracy_score(y_test,preds))

In [None]:
plt.plot(iv,av)

In [None]:
iv = np.array(iv)
av = np.array(av)

iv[av.argmax()]

In [None]:
pca = PCA(8)

reduced_train = pca.fit_transform(x_poly_train)

reduced_test = pca.transform(x_poly_test)
    
svc = SVC()

svc.fit(reduced_train,y_train)

preds = svc.predict(reduced_test)

In [None]:
print(classification_report(y_test,preds))