# Heart-Attack Analysis

![](http://www.templehealth.org/sites/default/files/inline-images/heart-attack-symptoms-men-vs-women.png)

# Importing Libraries

In [None]:

import numpy as np 
import pandas as pd 
import os
import seaborn as sns
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


# Importing Dataset

In [None]:
df = df = pd.read_csv("/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv")
df.head()

# HeatMap to check Correlation

In [None]:
sns.heatmap(df.corr(), annot=True)

**Data Information**

In [None]:
print(df.info())

In [None]:
for i in df:
 print(i,'\n',df[i].unique(),'\n')

In [None]:
df.head()

# Before and after dropping duplicates from datasets 

In [None]:
print('Before ',np.shape(df))
# Dropping duplicates from the user_dets dataset
df.drop_duplicates(inplace=True)
# Rechecking the shape of the ratings dataset after dropping duplicate records
print('After ',np.shape(df))


# checking for null values

In [None]:
df.isnull().sum()

**Importing Libraries**

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


# Splitting Dependent and Independent Features

In [None]:
X= df.iloc[:,:-1]
y = df.iloc[:,-1]

# Splitting arrays into Training and Testing Arrays

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(X,y,test_size=0.25, random_state=0)


# Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression
reg=LinearRegression()
reg.fit(X_train,y_train)

# Testing Model

In [None]:
y_pred = reg.predict(X_test)
y_pred

# R2 Score

In [None]:
from sklearn.metrics import r2_score
print('r2 score:',r2_score(y_test,y_pred))

# Estimators vs Accuracy

In [None]:
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
accuracy=[]
estimators_count=[]
for i in range(1,51,5):
 RandomForest = RandomForestClassifier(n_estimators = i, criterion = 'entropy', random_state = 0)
 RandomForest.fit(X_train, y_train)
 y_predicted = RandomForest.predict(X_test)
 accuracy.append(accuracy_score(y_test,y_predicted))
 estimators_count.append(i)
 print(f'{i} {accuracy_score(y_test,y_predicted).round(4)}')
 
# plot of No_of_Estimators VS Accuracy graph
plt.plot(estimators_count,accuracy)
plt.xlabel('Number of Estimators')
plt.ylabel('Accuracy')
plt.title('No_of_Estimators VS Accuracy')
plt.grid(b=None)
plt.show()


# Elbow Method Representation

In [None]:
from sklearn.cluster import KMeans
# Initialising wcss list
wcss = []
# Running the for loop
for i in range(2, 11):
 # Generating kmeans model with n_clusters=ith iteration of the for loop
 k_means = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)
 # Ftting the model onto the dataset 'X'
 k_means.fit(X)
 # Appending the sum of squares to wcss
 wcss.append(k_means.inertia_) 
# Plot of the results using the Elbow Method
plt.plot(range(2, 11), wcss)
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.grid()
plt.show()


# Silhouette Score

In [None]:
from sklearn.metrics import silhouette_score
from sklearn.cluster import KMeans
silhouette = []
kmax = 10
for k in range(2, kmax+1):
     k_means = KMeans(n_clusters = k).fit(X)
     labels = k_means.labels_
     silhouette.append(silhouette_score(X, labels, metric = 'manhattan',sample_size=5000))
print(silhouette)
# plot of results obtained using the Silhoutte Method 
plt.plot(range(2, kmax+1), silhouette)
plt.title('Silhouette Method')
plt.xlabel('k')
plt.ylabel('Silhouette Score')
plt.grid()
plt.show()
