In [None]:
#IMPORTING LIBARIES

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import resample
import warnings
warnings.filterwarnings('ignore')
sns.set_style('darkgrid')

In [None]:
Dataset= pd.read_csv('healthcare-dataset-stroke-data.csv') #imports the dataset


In [None]:
#DATA VISUALIZATION

Dataset.head()
Dataset.describe() #shows the distribution of the dataset

In [None]:
Dataset.info() #shows the summary of the dataset

In [None]:
Dataset.isnull().sum()  #shows the number of null values in each column.

In [None]:
Dataset.nunique() #shows the number of unique Categories for each column


In [None]:
# Performing some Exploratory Data Analysis (EDA)

plt.figure(figsize=(10,5))
stroke = Dataset.loc[Dataset['stroke']==1]
sns.countplot(data=stroke,x='ever_married', palette="Set2")
plt.title("Stroke / Marital Status")


plt.figure(figsize=(10,5))
stroke = Dataset.loc[Dataset['stroke']==1]
sns.countplot(data=stroke,x='smoking_status', palette="Set2")
plt.title("Stroke / Smoking Habit")

plt.figure(figsize=(10,5))
stroke = Dataset.loc[Dataset['stroke']==1]
sns.countplot(data=stroke,x='work_type', palette="Set2")
plt.title("Stroke / Profession")

plt.figure(figsize=(10,5))
stroke = Dataset.loc[Dataset['stroke']==1]
sns.countplot(data=stroke,x='Residence_type', palette="Set2")
plt.title("Stroke / Residence Type")

plt.figure(figsize=(10,5))
stroke = Dataset.loc[Dataset['stroke']==1]
sns.countplot(data=stroke,x='hypertension', palette="Set2")
plt.title("Stroke / Hypertensive Patients")

plt.figure(figsize=(10,5))
stroke = Dataset.loc[Dataset['stroke']==1]
sns.countplot(data=stroke,x='heart_disease', palette="Set2")
plt.title("Stroke / Heart Disease")






In [None]:
sns.countplot(data=Dataset, x= 'stroke', hue='gender', palette= "Set2")

plt.xlabel("Stroke Distribution With Gender")
plt.ylabel("Count")

In [None]:
sns.boxplot(x='age',data=Dataset , color = 'Red')


In [None]:
sns.boxplot(x='bmi',data=Dataset , color = 'Green')

In [None]:
sns.boxplot(x='avg_glucose_level',data=Dataset , color = 'Pink')

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(20, 15))

Dataset.plot(kind="hist", y="age", bins=100, color="purple", ax=axes[0][0])
Dataset.plot(kind="hist", y="bmi", bins=100, color="red", ax=axes[0][1])
Dataset.plot(kind="hist", y="avg_glucose_level", bins=100, color="blue", ax=axes[1][1])
Dataset.plot(kind="hist", y="heart_disease", bins=5, color="orange", ax=axes[1][0])

plt.show()

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 5))
Dataset.plot(kind='scatter', x='age', y='avg_glucose_level', alpha=0.5, color='blue', ax=axes[0], title="Age vs. Avg_glucose_level")
Dataset.plot(kind='scatter', x='bmi', y='avg_glucose_level', alpha=0.5, color='green', ax=axes[1], title="Age vs. BMI")
plt.show()


In [None]:
# Removing some outliers in the BMI column
median= Dataset.loc[Dataset['bmi']<75, 'bmi'].median()
Dataset.loc[Dataset.bmi> 75, 'bmi'] = np.nan
Dataset.fillna(median, inplace=True)

Dataset.isnull().sum()



In [None]:
plt.figure(figsize=(8,8))
sns.heatmap(Dataset.corr(), annot= True);

In [None]:
#DATA PRE-PROCESSING

#Dropping Unnecessary Column such as ID

Dataset = Dataset.drop('id', axis=1)
Dataset.head()


In [None]:
#Label Encoding 
Dataset['ever_married'].unique()

In [None]:
Dataset['Residence_type'].unique()

In [None]:
Marital = {'No': 0, 'Yes': 1}
Dataset['ever_married'] = Dataset['ever_married'].map(Marital)

In [None]:
Residence = {'Urban':0, 'Rural':1}
Dataset['Residence_type'] = Dataset['Residence_type'].map(Residence)
Dataset.head()

In [None]:
Dataset['gender'].unique()

In [None]:
Dataset['work_type'].unique()

In [None]:
Dataset['smoking_status'].unique()

In [None]:
from sklearn.preprocessing import OneHotEncoder
One_Hot= OneHotEncoder()

In [None]:
#OneHotEncode Column with Categorical Values

Dataset['gender']= pd.Categorical(Dataset['gender'])
Gender_Dummies= pd.get_dummies(Dataset['gender'], prefix= 'sub_gender')

In [None]:
Dataset['work_type']= pd.Categorical(Dataset['work_type'])
Work_Dummies= pd.get_dummies(Dataset['work_type'], prefix= 'sub_work')

In [None]:
Dataset['smoking_status']= pd.Categorical(Dataset['smoking_status'])
Smoking_Dummies= pd.get_dummies(Dataset['smoking_status'], prefix= 'sub_smoke')

In [None]:
#Drop The Previous Columns

Dataset.drop("gender", axis=1, inplace=True)
Dataset.drop("work_type", axis=1, inplace=True)
Dataset.drop("smoking_status", axis=1, inplace=True)


In [None]:
Dataset= pd.concat([Dataset, Gender_Dummies], axis=1)
Dataset = pd.concat([Dataset, Work_Dummies], axis=1)
Dataset = pd.concat([Dataset, Smoking_Dummies], axis=1)
Dataset

In [None]:
#Deriving the inputs as X and output as Y 

X = Dataset.drop('stroke', axis=1)
Y = Dataset['stroke']

In [None]:
# Removing Imbalances in data
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=42)
X,Y = sm.fit_resample(X,Y)

In [None]:
#Splitting data into train and test.

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state= 42)


In [None]:
print('X_train:' , np.shape(X_train))
print('y_train:' , np.shape(y_train))
print('X_test:' , np.shape(X_test))
print('y_test:' , np.shape(y_test))

In [None]:
#importing Keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential   
from tensorflow.keras.layers import Dense        

In [None]:
MLP_model = Sequential()

In [None]:
MLP_model.add(Dense(units = 20 , activation = 'relu', input_shape = (19,))) 
MLP_model.add(Dense(14, activation = 'relu'))
MLP_model.add(Dense(8, activation = 'relu'))
MLP_model.add(Dense(1, activation = 'sigmoid'))

In [None]:
sgd = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)
MLP_model.compile(loss = 'binary_crossentropy', optimizer = 'sgd', metrics = ['accuracy'])

In [None]:
MLP_model.fit(X_train, y_train, epochs=20)


In [None]:
loss_and_metrics = MLP_model.evaluate(X_test, y_test)
print('Loss = ',loss_and_metrics[0])
print('Accuracy = ',loss_and_metrics[1])


In [None]:
pip install susi

In [None]:
#SOM MODEL

import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import susi

In [None]:
from sklearn.datasets import make_classification

### define ratios (between 0 and 1)
n_features = 19

from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=42)
X,Y = sm.fit_resample(X,Y)

# split the data
X_train, X_test, y_train, y_test = train_test_split(
    X,Y, test_size=0.3, random_state=3, shuffle=True)

# scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(X_train.shape, X_test.shape)

In [None]:
som = susi.SOMClassifier(
    n_rows=60,
    n_columns=60,
    n_iter_unsupervised=1000,
    n_iter_supervised=1000,
    random_state=3)
som.fit(X_train, y_train)
y_pred = som.predict(X_test)
print("Accuracy: {0:.1f} %".format(som.score(X_test, y_test)*100))