In [None]:
import numpy as np
import pandas as pd 
import plotly.express as px
from plotly.offline import plot, iplot, init_notebook_mode

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:

df=pd.read_csv('/kaggle/input/heart-failure-prediction/heart.csv')
df.head(n=10)

In [None]:
df.shape

In [None]:
df.info()

### Detailed Exploratory Data Analysis 

### 1.Count of Each Sex

In [None]:
fig=px.pie(df,names='Sex',hole=0.5)
fig.update_traces(textposition='inside',
                  textinfo='percent+label',
                  marker=dict(line=dict(color='#000000', width = 1.5)))

fig.show()

### 2.Median Age

In [None]:
fig=px.histogram(df,x='Age',color='Sex', title="<b>Average Age Gender wise</b>")
fig.add_vline(x=df['Age'].mean(),line_dash="dash")
fig.show()

### Male VS Heart Attack Risk

In [None]:
male=df[df['Sex']=='M']
print(male.shape)

In [None]:
male_risk_of_HeartAttack=male[male['HeartDisease']==1]
male_risk_of_HeartAttack.shape

In [None]:
details_male={"Total Male":male.shape[0],
              "Total Male affected by Heart Disease":male_risk_of_HeartAttack.shape[0],
               "Percentage of Males getting Affected":np.round((male_risk_of_HeartAttack.shape[0]*100)/male.shape[0])
    
}


In [None]:
details_male

### Female VS Heart Disease Risk

In [None]:
female=df[df['Sex']=='F']
print(female.shape)

In [None]:
female_riskOf_HeartAttack=female[female['HeartDisease']==1]
female_riskOf_HeartAttack.shape

In [None]:
details_female={"Total Female":female.shape[0],
              "Total Female affected by Heart Disease":female_riskOf_HeartAttack.shape[0],
               "Percentage of Females getting Affected":np.round((female_riskOf_HeartAttack.shape[0]*100)/female.shape[0])
    
}

In [None]:
details_female

In [None]:
Full_details={"Total Male":male.shape[0],
              "Total Male affected by Heart Disease":male_risk_of_HeartAttack.shape[0],
               "Percentage of Males getting Affected":np.round((male_risk_of_HeartAttack.shape[0]*100)/male.shape[0]),
              "Total Female":female.shape[0],
              "Total Female affected by Heart Disease":female_riskOf_HeartAttack.shape[0],
               "Percentage of Females getting Affected":np.round((female_riskOf_HeartAttack.shape[0]*100)/female.shape[0])
    
                
    
}
gender_wise=pd.DataFrame(Full_details,index=['Details'])

In [None]:
gender_wise.T

### From above data we can conclude that Males are more prone to heart Disease

In [None]:
fig=px.histogram(df,x='Sex',color='HeartDisease',template='plotly_dark')
fig.show()

### Old Age VS Heart Disease 

In [None]:
old_age=df[df['Age']>df['Age'].mean()]
old_age.shape

In [None]:
old_age_risk=old_age[old_age['HeartDisease']==1]
old_age_risk.shape

In [None]:
oldAge_details={ 'Total old age people':old_age.shape[0],
                 'old age people who suffered Heart Diasese':old_age_risk.shape[0],
                 '% of old age people who suffered Heart Diasese':np.round(old_age_risk.shape[0]*100/old_age.shape[0])
                  }

In [None]:
old_age_details=pd.DataFrame(oldAge_details,index=['Details'])

In [None]:
old_age_details.T

### Young people VS Heart Disease

In [None]:
young_people=df[df['Age']<df['Age'].mean()]
young_people.shape[0]

In [None]:
young_people_risk=young_people[young_people['HeartDisease']==1]
young_people_risk.shape[0]

In [None]:
young_people_details={'Total young people':young_people.shape[0],
                      'Young people who have heart Disease':young_people_risk.shape[0],
                      '% of young people who have heart Disease':np.round((young_people_risk.shape[0]*100)/young_people.shape[0])
                       
    
}

In [None]:
young_people_details=pd.DataFrame(young_people_details,index=['details'])

In [None]:
young_people_details.T

### From above data we can conclude that old age people are more prone to heart Disease than young people 

### Which type of ChestPain can lead to HeartDisease?

In [None]:
df['ChestPainType'].value_counts()

In [None]:
fig=px.pie(df,names='ChestPainType')
fig.show()

In [None]:
asy=df[df['ChestPainType']=='ASY']
nap=df[df['ChestPainType']=='NAP']
ata=df[df['ChestPainType']=='ATA']
ta=df[df['ChestPainType']=='TA']

asy_heartRisk=asy[asy['HeartDisease']==1]
nap_heartRisk=nap[nap['HeartDisease']==1]
ata_heartRisk=ata[ata['HeartDisease']==1]
ta_heartRisk=ta[ta['HeartDisease']==1]

In [None]:
ChestPainDetails={
                   '% of heart Disease with ASY Type ChestPain':np.round(asy_heartRisk.shape[0]*100)/asy.shape[0],
                   '% of heart Disease with NAP Type ChestPain':np.round(nap_heartRisk.shape[0]*100)/nap.shape[0],
                   '% of heart Disease with ATA Type ChestPain':np.round(ata_heartRisk.shape[0]*100)/ata.shape[0],
                   '% of heart Disease with TA Type ChestPain':np.round(ta_heartRisk.shape[0]*100)/ta.shape[0]
    
}

In [None]:
chestPain=pd.DataFrame(ChestPainDetails,index=['Details'])

In [None]:
chestPain.T

### From above data we can conclude that people with ChestPain Type ASY are more prone to Heart Disease .We cannot take TA type ChestPain into consideration as it only has 46 examples . People with ATA type ChestPain are least Prone to Heart Disease.


In [None]:
fig=px.histogram(df,x='ChestPainType',color='HeartDisease')
fig.show()


### Excercise Induced Angina VS Heart Disease 

In [None]:
angina=df[df['ExerciseAngina']=='N']
no_angina=angina[angina['HeartDisease']==1]


In [None]:
print('Percentage of people with No Angima who are still having Heart Disease is',np.round(no_angina.shape[0]*100/angina.shape[0]))

In [None]:
fig=px.histogram(df,'ExerciseAngina',color='HeartDisease')
fig.show()

### Preparation for Machine Learning Model 

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [None]:
sc=StandardScaler()
le=LabelEncoder()

In [None]:
df['Sex']=le.fit_transform(df['Sex'])
df['ChestPainType']=le.fit_transform(df['ChestPainType'])
df['ExerciseAngina']=le.fit_transform(df['ExerciseAngina'])
df['ST_Slope']=le.fit_transform(df['ST_Slope'])
df['RestingECG']=le.fit_transform(df['RestingECG'])

df.head(n=10)

### Checking co-relation 

In [None]:
data=df.corr()
data

### As we can see ChestPainType , Cholesterole , MaxHr , ST_Slope has negative correlation with HeartDisease,These columns will be dropped 

In [None]:
col_to_drop=['ChestPainType','Cholesterol','MaxHR','ST_Slope']
df.drop(col_to_drop,axis=1,inplace=True)

In [None]:
df

### Coverting DataFrame to numpy array

In [None]:
df=df.values
x=df[:,:-1]
y=df[:,-1]

In [None]:
x=sc.fit_transform(x)

### Spliting into training and testing DataSet

In [None]:
split=int(0.8*x.shape[0])
x_train=x[:split]

y_train=y[:split]

x_test=x[split:]
y_test=y[split:]

print(x_train.shape,y_train.shape,x_test.shape,y_test.shape)

In [None]:
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import StackingClassifier

svm=LinearSVC(C= 0.4,dual=False)
rfc=RandomForestClassifier()
dt=DecisionTreeClassifier()
lr=LogisticRegression()


In [None]:
params=[
    
    {
       
         'C':[0.1,0.2,0.4,0.5,1.0,2.0,5.0]
    }
]

In [None]:
gs=GridSearchCV(estimator=svm,param_grid=params,scoring='accuracy',cv=5)

In [None]:
gs.fit(x_train,y_train)

In [None]:
gs.best_params_

In [None]:
gs.score(x_train,y_train)

In [None]:
gs.score(x_test,y_test)

In [None]:
base_models = [
    ('rf', RandomForestClassifier(n_estimators= 1400,
 min_samples_split= 5,
 max_depth= 80,
 bootstrap= True)),
    ('SVC', LinearSVC(C = 1))]

In [None]:
meta_model = LogisticRegression()

In [None]:
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_model, passthrough=True, cv=3)

In [None]:
stacking_model.fit(x_train,y_train)

In [None]:
stacking_model.score(x_test,y_test)