In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Data


 
*     Age : Age of the patient
 
*     Sex : Sex of the patient
 
*     exang: exercise induced angina (1 = yes; 0 = no)
 
*     ca: number of major vessels (0-3)
 
*     cp : Chest Pain type chest pain type
              1.Value 1: typical angina
              2.Value 2: atypical angina
              3.Value 3: non-anginal pain
              4.Value 4: asymptomatic
 
*     trtbps : resting blood pressure (in mm Hg)
 
*     chol : cholestoral in mg/dl fetched via BMI sensor
 
*     fbs : (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false)

*     rest_ecg : resting electrocardiographic results
              1.Value 0: normal
              2.Value 1: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV)
              3.Value 2: showing probable or definite left ventricular hypertrophy by Estes' criteria
 
*     thalach : maximum heart rate achieved

*     target : 0= less chance of heart attack 1= more chance of heart attack

In [None]:
heart_df = pd.read_csv('/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv')
Oxygen_sat_df = pd.read_csv('/kaggle/input/heart-attack-analysis-prediction-dataset/o2Saturation.csv')

In [None]:
heart_df.head()

In [None]:
Oxygen_sat_df.head()

In [None]:
heart_df.info()

In [None]:
heart_df.describe()

#### Combine the O2 saturation data with the rest of the data

In [None]:
HeartAttack_df = pd.concat([heart_df, Oxygen_sat_df], axis=1, join='inner')

display(HeartAttack_df)


In [None]:
#renaming the o2 saturation column
HeartAttack_df.rename(columns={'98.6': 'Sat_level'}, inplace=True)

In [None]:
HeartAttack_df

In [None]:
#checking for null values
HeartAttack_df.isnull().sum()


In [None]:
"""HeartAttack_df["sex"]= pd.DataFrame(HeartAttack_df["sex"], dtype='str')
def datas(x):
    if x == "0":
        return "Female"
    elif x== "1":
        return "Male"
    
HeartAttack_df["sex"]= HeartAttack_df["sex"].apply(datas)
HeartAttack_df.head(5)
"""

### Visualizaton

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px


In [None]:
sns.pairplot(data=HeartAttack_df)

In [None]:
#checking cholestoral levels in sex
sns.displot(data=HeartAttack_df, x="chol", col="sex", kde=True)

In [None]:
plt.figure(figsize=(12,6))
sns.swarmplot(x="output", y="chol", hue="sex",data=HeartAttack_df)

In [None]:
sns.countplot(data=HeartAttack_df,x="sex")

In [None]:
 sns.histplot(x='age',hue='output',data=HeartAttack_df,palette='crest')

In [None]:
sns.boxplot(y='age',x='output',data=HeartAttack_df,palette='Accent')

In [None]:
fig = px.box(HeartAttack_df, x='output',y='age')
fig.show()

In [None]:
correlations = HeartAttack_df.corr()


In [None]:
plt.figure(figsize=(12,6))
sns.heatmap(correlations)
plt.show()

## Importing machine learning modules

In [None]:
!pip install pyforest
!pip install lazypredict 

In [None]:
import pyforest
import warnings
warnings.filterwarnings("ignore")
from sklearn import metrics
from sklearn.metrics import accuracy_score

In [None]:
X = HeartAttack_df.drop(['output'], axis=1)
y = HeartAttack_df.output

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, random_state=42)

#!pip install lazypredict
#### Using lazypredict for model predictions

In [None]:
import lazypredict
from lazypredict.Supervised import LazyClassifier


### Using lazypredict to predict  how our models will behave in the dataset.

In [None]:
clf = LazyClassifier(verbose=0,ignore_warnings=True)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
models

In [None]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

### Confirming if the prediction of random forestclassifier was correct and as you can see it did work

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [None]:
print(classification_report(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))

#### Keep in mind that the results obtained with lazy predict SHOULD NOT be considered final models,since different models have different approaches, you should know how each of the models works before choosing your final one