In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import plotly.express as px
import plotly.graph_objs as go

In [None]:
!pip install dataprep


In [None]:
from dataprep.eda import *


In [None]:
df = pd.read_csv('../input/stroke-prediction-dataset/healthcare-dataset-stroke-data.csv')


In [None]:
df.head()

In [None]:
df.columns


In [None]:
df.select_dtypes(exclude=['int64','float64']).columns


In [None]:
df.gender.replace({'Male': 1, 'Female': 0}, inplace=True)

df.ever_married.replace({'No': 0, 'Yes': 1}, inplace=True)

df.work_type.replace({'Private': 0, 'Self-employed': 1, 'children': 2,'Govt_job':3,'Never_worked':4}, inplace=True)

df.Residence_type.replace({'Urban': 0, 'Rural': 1}, inplace=True)

df.smoking_status.replace({'never smoked': 0, 'Unknown': 1,'formerly smoked':2,'smokes':3}, inplace=True)



In [None]:
df['gender']=pd.get_dummies(df['gender'])

In [None]:
create_report(df)


In [None]:
df.head()

In [None]:
df=df.drop(columns='id',axis=1)


In [None]:
df.head()

In [None]:
df.fillna(df.mode())

In [None]:
df=df.dropna()

In [None]:
trace0 = go.Box(
    name = "gender",
    y = df["gender"]
)

trace1 = go.Box(
    name = "age",
    y = df["age"]
)

trace2 = go.Box(
    name = "hypertension",
    y = df["hypertension"]
)

trace3 = go.Box(
    name = "heart_disease",
    y = df["heart_disease"] 
)

trace4 = go.Box(
    name = "ever_married",
    y = df["ever_married"]
)

trace5 = go.Box(
    name = "work_type",
    y = df["work_type"]
)

trace6 = go.Box(
    name = "Residence_type",
    y = df["Residence_type"]
)

trace7 = go.Box(
    name = "avg_glucose_level",
    y = df["avg_glucose_level"]
)

trace8 = go.Box(
    name = "bmi",
    y = df["bmi"]
)

trace9 = go.Box(
    name = "smoking_status",
    y = df["smoking_status"]
)

trace10 = go.Box(
    name = "stroke",
    y = df["stroke"]
)
data = [trace0, trace1, trace2 , trace3 , trace4 , trace5 ,trace6, trace7, trace8 , trace9 , trace10  ]
plotly.offline.iplot(data)

In [None]:
plot_correlation(df, "stroke")


In [None]:
fig = px.scatter_matrix(df, dimensions=['gender', 'age', 'hypertension', 'stroke'])
fig.show()

In [None]:
fig = px.scatter_matrix(df, dimensions=[ 'heart_disease', 'ever_married',
       'work_type', 'stroke'])
fig.show()

In [None]:
fig = px.scatter_matrix(df, dimensions=['Residence_type', 'avg_glucose_level', 'bmi',
                                        'stroke'])
fig.show()

In [None]:
fig = px.scatter_matrix(df, dimensions=['Residence_type', 'avg_glucose_level', 'bmi',
                                        'smoking_status','stroke'])
fig.show()

In [None]:
X = df.drop(columns=["stroke"])
y = df["stroke"]

In [None]:
from sklearn.model_selection import train_test_split
#Splitting data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=44, shuffle =True)


In [None]:
from sklearn.ensemble import GradientBoostingClassifier

#Applying GradientBoostingClassifier Model 


GBCModel = GradientBoostingClassifier(n_estimators=100,max_depth=3,random_state=33) 
GBCModel.fit(X_train, y_train)

#Calculating Details
print('GBCModel Train Score is : ' , GBCModel.score(X_train, y_train))
print('GBCModel Test Score is : ' , GBCModel.score(X_test, y_test))
#print('----------------------------------------------------')

#Calculating Prediction
y_pred = GBCModel.predict(X_test)
y_pred_prob = GBCModel.predict_proba(X_test)
#print('Predicted Value for GBCModel is : ' , y_pred[:10])
#print('Prediction Probabilities Value for GBCModel is : ' , y_pred_prob[:10])

In [None]:
#Import Libraries
from sklearn.metrics import f1_score

#Calculating F1 Score  : 2 * (precision * recall) / (precision + recall)
# f1_score(y_true, y_pred, labels=None, pos_label=1, average=’binary’, sample_weight=None)

F1Score = f1_score(y_test, y_pred, average='micro') #it can be : binary,macro,weighted,samples
print('F1 Score is : ', F1Score)

In [None]:
#Import Libraries
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

#Calculating Confusion Matrix
CM = confusion_matrix(y_test, y_pred)
print('Confusion Matrix is : \n', CM)

# drawing confusion matrix
sns.heatmap(CM, center = True,cmap='PuBu')
plt.show()