<img src = "https://media.giphy.com/media/3orieTTZVPaVNMhRra/giphy.gif" align = "center">

In [None]:
!pip install autoviml -q
!pip install dexplot -q

In [None]:
from autoviml.Auto_ViML import Auto_ViML

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['figure.dpi'] = 200 
plt.xkcd()
import seaborn as sns
import dexplot as dxp
import plotly.express as px
import altair as alt
sns.set_theme(style="ticks", palette="pastel")

alt.data_transformers.enable('default', max_rows=None)

%matplotlib inline
DATA_PATH = '/kaggle/input/stroke-prediction-dataset/healthcare-dataset-stroke-data.csv'

In [None]:
data = pd.read_csv(DATA_PATH)

In [None]:
data.head()

In [None]:
data.describe()

In [None]:
data.info()

In [None]:
data.isna().sum()

In [None]:
data['bmi'] = data['bmi'].fillna(data['bmi'].mean())

In [None]:
data

In [None]:
dxp.count('stroke',data=data,figsize=(10,5),title='Stroke Count')

In [None]:
alt.Chart(data).mark_bar(color="purple").encode(
    alt.X('gender'),
    y='count()'
).properties(
    width=500,
    height=400
)

In [None]:
dxp.count(val='stroke', data=data, split='smoking_status',figsize=(10,6),title='Stroke Risk')

In [None]:
dxp.count(val='work_type', data=data, split='smoking_status',figsize=(10,6),title='Smoking at Work')

In [None]:
sns.displot(
    data=data,
    x="age", hue="smoking_status",
    kind="kde", height=6,
    multiple="fill", clip=(0, None),
    palette="rocket_r",
)

In [None]:
sns.displot(
    data=data,
    x="bmi", hue="smoking_status",
    kind="kde", height=6,
    multiple="fill", clip=(0, None),
    palette="rocket_r",
)

In [None]:
sns.pairplot(data, hue="stroke")

In [None]:
dxp.line(x='work_type', y='bmi', data=data, aggfunc='mean', orientation='v',split='stroke',figsize=(10,8),title='Line Plot')

In [None]:
dxp.line(x='smoking_status', y='age', data=data, aggfunc='mean', orientation='v',split='stroke',figsize=(10,8),title='Line Plot')

In [None]:
dxp.line(x='smoking_status', y='avg_glucose_level', data=data, aggfunc='mean', orientation='v',split='stroke',figsize=(10,8),title='Line Plot')

In [None]:
dxp.count(val='work_type', data=data, split='stroke',orientation='v', stacked=True, col='Residence_type',figsize=(10,5))

In [None]:
dxp.bar(x='Residence_type', y='bmi', data=data, aggfunc='mean', 
        split='stroke', col='work_type', wrap=2, col_order='desc',figsize=(10,8))

In [None]:
dxp.scatter(x='age', y='bmi', data=data, split='stroke',figsize=(10,8))

In [None]:
dxp.scatter(x='age', y='avg_glucose_level', data=data, split='stroke',figsize=(10,8))

In [None]:
data.sample(10)

In [None]:
dxp.box(x='avg_glucose_level', y='work_type', data=data,split='stroke',figsize=(10,5))

In [None]:
pdata = data[data['work_type'] == 'Private']    
sdata = pdata['smoking_status'].value_counts()[:10] 

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(14, 7), dpi=200)
ax.bar(sdata.index, sdata, width=0.6, edgecolor='black')
ax.set_title(f'Smokers in Private Jobs', fontweight='bold')
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(10,10))
ax = Axes3D(fig)
ax.scatter(data["age"], data["bmi"], data["avg_glucose_level"], c="y", s=20, alpha=0.5)
plt.title('Age vs BMI vs Glucose level')
plt.show()

In [None]:
pdata = data[data['ever_married'] == 'No']    
sdata = pdata['stroke'].value_counts()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(14, 7), dpi=200)
ax.bar(sdata.index, sdata, width=0.6, edgecolor='black')
ax.set_title(f'Non- Married with Stroke Distribution', fontweight='bold')
plt.show()

In [None]:
dxp.count('stroke',data=data,figsize=(10,8),split='gender',title=
         'Gender Distribution with Stroke')

In [None]:
sns.lmplot(data=data,x='age',y='bmi')

In [None]:
sns.lmplot(data=data,x='age',y='avg_glucose_level')

In [None]:
dxp.box(x='gender', y='age', data=data, orientation='v', split='stroke',figsize=(10,8))

In [None]:
figure = plt.figure(figsize=(20,8))
sns.jointplot(x="bmi", y="age", data=data,
                  kind="reg", truncate=False,
                  color="m", height=7)
plt.show()

In [None]:
figure = plt.figure(figsize=(20,8))
sns.jointplot(x="bmi", y="avg_glucose_level", data=data,
                  kind="reg", truncate=False,
                  color="r", height=7)
plt.show()

In [None]:
figure = plt.figure(figsize=(20,8))
sns.histplot(
    data,
    x="age", hue="stroke",
    multiple="stack",
    palette="light:m_r",
    edgecolor=".3",
    linewidth=.5,
    log_scale=False,
)
plt.show()

In [None]:
figure = plt.figure(figsize=(20,8))
sns.histplot(
    data,
    x="bmi", hue="stroke",
    multiple="stack",
    palette="light:r",
    edgecolor=".3",
    linewidth=.5,
    log_scale=False,
)
plt.show()

In [None]:
figure = plt.figure(figsize=(20,8))
sns.histplot(
    data,
    x="avg_glucose_level", hue="stroke",
    multiple="stack",
    palette="light:b_r",
    edgecolor=".3",
    linewidth=.5,
    log_scale=False,
)
plt.show()

In [None]:
dxp.count('stroke',data=data,figsize=(10,8),split='heart_disease',title=
         'Heart Condition Distribution with Stroke')

In [None]:
dxp.count('stroke',data=data,figsize=(10,8),split='hypertension',title=
         'Hyper-Tension Condition Distribution with Stroke')

In [None]:
dxp.violin(x='age', y='Residence_type', data=data, 
          split='stroke',figsize=(10,8),title='Violin for Residense Type')

In [None]:
dxp.kde(x='bmi', y='avg_glucose_level', data=data, row='work_type', wrap=3)

In [None]:
data = data.drop('id',1)
df = data.copy(deep=True)

test_sample = df.sample(int(df.shape[0] * .20), random_state=99)
train, test = df[~df.isin(test_sample)].dropna(), df[df.isin(test_sample)].dropna()

test.info()
train.info()

In [None]:
target = 'stroke'
scoring_parameter = 'weighted_f1'

In [None]:
m, feats, trainm, testm = Auto_ViML(train, target, test,
                                    scoring_parameter=scoring_parameter,
                                    hyper_param='GS',feature_reduction=True,
                                    Boosting_Flag=True,Binning_Flag=False, Stacking_Flag=True,                                    
                                    Imbalanced_Flag=True, 
                                    verbose=2)

<img src = "https://media.giphy.com/media/UsS3w9ZCvcshzMdbJ8/giphy.gif">