In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

In [2]:
data = pd.read_csv("stroke.csv", index_col=0)

In [3]:
data.head()

Unnamed: 0_level_0,gender,age,hypertension,alcohol_intake,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
9046,Male,67.0,147,2,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
51676,Female,61.0,135,4,0,Yes,Self-employed,Rural,202.21,,never smoked,1
31112,Male,80.0,115,3,1,Yes,Private,Rural,105.92,32.5,never smoked,1
60182,Female,49.0,125,2,0,Yes,Private,Urban,171.23,34.4,smokes,1
1665,Female,79.0,130,3,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1


In [4]:
data['hypertension'] = data['hypertension'].astype(float) 
data['alcohol_intake'] = data['alcohol_intake'].astype(float)

In [5]:
data = data.drop("ever_married" , axis=1)
data = data.drop("work_type" , axis=1)

In [6]:
for column in data.columns:
    if data[column].dtype == np.number:
        continue
    data[column] = LabelEncoder().fit_transform( data[column] )

In [7]:
data = data.fillna(data.mean())

In [8]:
X = data.drop(["stroke"], axis=1)
Y = data["stroke"]

In [9]:
x_scaler = MinMaxScaler()
x_scaler.fit(X)
column_names = X.columns
X[column_names]=x_scaler.transform(X)

In [10]:
X.head()

Unnamed: 0_level_0,gender,age,hypertension,alcohol_intake,heart_disease,Residence_type,avg_glucose_level,bmi,smoking_status
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
9046,1.0,0.816895,0.858974,0.333333,1.0,1.0,0.801173,0.30126,0.333333
51676,0.0,0.743652,0.782051,0.666667,0.0,0.0,0.678875,0.21538,0.666667
31112,1.0,0.975586,0.653846,0.5,1.0,0.0,0.234159,0.254296,0.666667
60182,0.0,0.597168,0.717949,0.333333,0.0,1.0,0.535793,0.27606,1.0
1665,0.0,0.963379,0.75,0.5,0.0,0.0,0.549141,0.15693,0.666667


In [11]:
X=np.array(X)
Y= np.array(Y)

In [12]:
from imblearn.over_sampling import SMOTE 
sm = SMOTE(random_state = 2) 
X_res, Y_res = sm.fit_sample(X,Y.ravel())

In [13]:
print("After OverSampling, counts of label '1': {}".format(sum(Y_res == 1))) 
print("After OverSampling, counts of label '0': {}".format(sum(Y_res == 0))) 

After OverSampling, counts of label '1': 2751
After OverSampling, counts of label '0': 2751


In [14]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation,Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.metrics import binary_crossentropy

In [15]:
model = Sequential([
    Dense(32, input_shape=(X_res.shape[1],), activation='relu'),
    Dense(16, activation='relu'),
    Dense(8, activation='relu'),
    Dense(2, activation='softmax'),    
])

In [16]:
model.compile(optimizer=Adam(learning_rate = 0.0001),
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])

In [17]:
model.fit(X_res,Y_res,validation_split=0.1,batch_size=10,epochs=145,shuffle=True,verbose=2)

Train on 4951 samples, validate on 551 samples
Epoch 1/145
4951/4951 - 3s - loss: 0.6922 - accuracy: 0.5678 - val_loss: 0.6943 - val_accuracy: 0.4283
Epoch 2/145
4951/4951 - 1s - loss: 0.6828 - accuracy: 0.6887 - val_loss: 0.6815 - val_accuracy: 0.5535
Epoch 3/145
4951/4951 - 2s - loss: 0.6658 - accuracy: 0.7324 - val_loss: 0.6479 - val_accuracy: 0.7241
Epoch 4/145
4951/4951 - 2s - loss: 0.6407 - accuracy: 0.7449 - val_loss: 0.6309 - val_accuracy: 0.6878
Epoch 5/145
4951/4951 - 1s - loss: 0.6055 - accuracy: 0.7417 - val_loss: 0.6240 - val_accuracy: 0.6606
Epoch 6/145
4951/4951 - 2s - loss: 0.5677 - accuracy: 0.7435 - val_loss: 0.5565 - val_accuracy: 0.7586
Epoch 7/145
4951/4951 - 1s - loss: 0.5362 - accuracy: 0.7548 - val_loss: 0.5463 - val_accuracy: 0.7532
Epoch 8/145
4951/4951 - 1s - loss: 0.5120 - accuracy: 0.7603 - val_loss: 0.4897 - val_accuracy: 0.8094
Epoch 9/145
4951/4951 - 2s - loss: 0.4945 - accuracy: 0.7706 - val_loss: 0.4997 - val_accuracy: 0.8004
Epoch 10/145
4951/4951 - 1

Epoch 80/145
4951/4951 - 2s - loss: 0.4113 - accuracy: 0.8158 - val_loss: 0.4676 - val_accuracy: 0.8367
Epoch 81/145
4951/4951 - 2s - loss: 0.4114 - accuracy: 0.8202 - val_loss: 0.4752 - val_accuracy: 0.8312
Epoch 82/145
4951/4951 - 1s - loss: 0.4108 - accuracy: 0.8184 - val_loss: 0.4440 - val_accuracy: 0.8421
Epoch 83/145
4951/4951 - 1s - loss: 0.4100 - accuracy: 0.8158 - val_loss: 0.4486 - val_accuracy: 0.8421
Epoch 84/145
4951/4951 - 2s - loss: 0.4095 - accuracy: 0.8184 - val_loss: 0.4020 - val_accuracy: 0.8748
Epoch 85/145
4951/4951 - 2s - loss: 0.4093 - accuracy: 0.8202 - val_loss: 0.4572 - val_accuracy: 0.8403
Epoch 86/145
4951/4951 - 2s - loss: 0.4090 - accuracy: 0.8178 - val_loss: 0.4870 - val_accuracy: 0.8294
Epoch 87/145
4951/4951 - 2s - loss: 0.4090 - accuracy: 0.8186 - val_loss: 0.4608 - val_accuracy: 0.8403
Epoch 88/145
4951/4951 - 2s - loss: 0.4082 - accuracy: 0.8204 - val_loss: 0.4579 - val_accuracy: 0.8439
Epoch 89/145
4951/4951 - 2s - loss: 0.4077 - accuracy: 0.8241 - 

<tensorflow.python.keras.callbacks.History at 0x1d4ecbf3f28>

In [18]:
user_input=input("Enter the values one by one")
user_input=user_input.split(",")


for i in range(len(user_input)):
    # convert each item to int type
    user_input[i] = float(user_input[i])
    

Enter the values one by one1,67,147,2,1,1,228.69,36.60,1


In [19]:
user_input = np.array(user_input)
user_input = user_input.reshape(1,-1)
user_input = x_scaler.transform(user_input)
pred = model.predict(user_input)
output = np.argmax(pred[0])
if(output== 1):
  print("Warning! You have chances of getting a stroke!")
else:
  print("You are healthy and are less likely to get a stroke!")



In [20]:
model.save("final_model.h5")

In [21]:
import sklearn
from pyngrok import ngrok

In [7]:
%%writefile stroke.py
import streamlit as st
import base64
import sklearn
import numpy as np
import pickle as pkl
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow import keras
scal=MinMaxScaler()
#Load the saved model

model = tf.keras.models.load_model('final_model.h5')



st.set_page_config(page_title="Stroke prediction app ",page_icon="smiley",layout="centered",initial_sidebar_state="expanded")



def preprocess(gender,age,hypertension,alcohol_take,heart_disease,residence_type,avg_glucose_level,bmi,smoking_status ):   
 
    
    # Pre-processing user input   
    if gender=="male":
        gender=1 
    else: gender=0
	
    if heart_disease =="Yes":
        heart_disease = 1
    else:
        heart_disease = 0
    
    
    if residence_type=="Urban":
        residence_type=1
    else: residence_type = 0
	
    
    if smoking_status=="formely smoked":
        smoking_status = 1
    elif smoking_status=="never smoked":
        smoking_status = 2
    else:
        smoking_status =3
	
	


    user_input=[gender,age,hypertension,alcohol_take,residence_type,avg_glucose_level,bmi,smoking_status]
    user_input=np.array(user_input)
    user_input=user_input.reshape(1,-1)
    user_input=scal.fit_transform(user_input)
    prediction = model.predict(user_input)

    return prediction

    

       
    # front end elements of the web page 
html_temp = """ 
    <div style ="background-color:pink;padding:13px"> 
    <h1 style ="color:black;text-align:center;">Stroke Prediction App</h1> 
    </div> 
    """
      
# display the front end aspect
st.markdown(html_temp, unsafe_allow_html = True) 
st.subheader('by leesa-menezes ')
      
# following lines create boxes in which user can enter data required to make prediction
gender= st.radio("Select Gender: ", ('male', 'female'))
age=st.text_input("Age","Enter here")
hypertension =st.text_input("Hypertension","Enter here")
alcohol_take = st.text_input("Alcohol intake", "Please enter the amount of glasses consumed daily")
heart_disease= st.radio("Select you have a heart disease or not: ", ('Yes', 'No'))
residence_type= st.radio("Select your Residence Type: ", ('Urban', 'Rural'))
avg_glucose_level=st.text_input("Average Glucose Level","Please enter your value here")
bmi=st.text_input("BMI","Enter here")
smoking_status= st.radio("Select your smoking status: ", ('formely smoked', 'never smoked', 'smokes'))


#user_input=preprocess(sex,cp,exang, fbs, slope, thal )
pred=preprocess(gender,age,hypertension,alcohol_take,heart_disease,residence_type,avg_glucose_level,bmi,smoking_status)

output = np.argmax(pred[0])


if st.button("Predict"):    
  if output == 1:
    st.error('Warning! You have high risk of getting a stroke!')
    
  else:
    st.success('You have lower risk of getting a stroke!')
    
   



st.sidebar.subheader("About App")

st.sidebar.info("This web app is helps you to find out whether you are at a risk of developing a heart disease.")
st.sidebar.info("Enter the required fields and click on the 'Predict' button to check whether you have a healthy heart")
st.sidebar.info("Don't forget to rate this app")



feedback = st.sidebar.slider('How much would you rate this app?',min_value=0,max_value=5,step=1)

if feedback:
  st.header("Thank you for rating the app!")
  st.info("Caution: This is just a prediction and not doctoral advice. Kindly see a doctor if you feel the symptoms persist.")


Overwriting stroke.py


In [8]:
!nohup streamlit run stroke.py &
url = ngrok.connect(port='8501')
url

OSError: Background processes not supported.