In [30]:
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Dropout, Embedding
from tensorflow.keras.layers import Flatten, Input, AlphaDropout, GlobalMaxPooling1D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
import os
import numpy as np
# os.environ['TF_USE_LEGACY_KERAS'] = '1'

class my_cnn(object):

    def __init__(self,input_size, optimizer='adam',loss='binary_crossentropy',num_classes=2, embedding_size=128):
        self.input_size = input_size
        self.max_index = 65535
        self.embedding_size = embedding_size
        self.optimizer = optimizer
        self.loss = loss
        self.metrics = ["accuracy"]
        self.num_classes = num_classes
        self.model =  self._build_model()
        print(self.model.summary())

    def _build_model(self):
        #        verbose, epochs, batch_size = 0, 1, 32
        model = Sequential()
        n_timesteps, n_features, n_outputs = X_train.shape[1], X_train.shape[2], self.num_classes
        model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps, n_features)))
        model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
        model.add(Dropout(0.2))
        model.add(MaxPooling1D(pool_size=2))
        model.add(Flatten())
        model.add(Dense(100, activation='relu'))
        model.add(Dense(n_outputs, activation='sigmoid'))
        model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=['accuracy'])

        #        model = Model(inputs = inputs, outputs = outputs)

        #        model.compile(optimizer = self.optimizer, loss = self.loss, metrics = self.metrics)
        return model


    def train(self, X_train, y_train, X_val, y_val, epochs, batch_size, checkpoint_every=None):
        self.model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, batch_size=batch_size)

    def get_model(self):
        return self.model

    def test(self, X_test, y_test, batch_size):
        self.model.evaluate(X_test, y_test, batch_size=batch_size)

    def predict(self, X_test, batch_size):
        predict_x=self.model.predict(X_test,batch_size)
        classes_x=np.argmax(predict_x,axis=1)
        return classes_x

    def save_model(self, path):
        self.model.save(path)


In [31]:
DATA_DIR = '/content/'
from glob import glob
all_csv_files = glob(DATA_DIR + "/*.csv")

In [32]:
import pandas as pd
for file in all_csv_files:
  data = pd.read_csv(file, on_bad_lines='warn')
data.head()

Skipping line 23399: expected 22 fields, saw 23

Skipping line 35099: expected 22 fields, saw 27
Skipping line 46794: expected 22 fields, saw 33

Skipping line 81887: expected 22 fields, saw 23
Skipping line 93591: expected 22 fields, saw 28

Skipping line 105286: expected 22 fields, saw 28
Skipping line 128682: expected 22 fields, saw 25

Skipping line 163793: expected 22 fields, saw 25

Skipping line 175488: expected 22 fields, saw 35

Skipping line 210584: expected 22 fields, saw 27
Skipping line 222288: expected 22 fields, saw 24

Skipping line 245676: expected 22 fields, saw 40

Skipping line 280766: expected 22 fields, saw 29

Skipping line 315872: expected 22 fields, saw 33

Skipping line 339275: expected 22 fields, saw 32

  data = pd.read_csv(file, on_bad_lines='warn')


Unnamed: 0,HeartDiseaseorAttack,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,Diabetes,PhysActivity,Fruits,...,AnyHealthcare,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income
0,0.0,1.0,1.0,1.0,40.0,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,5.0,18.0,15.0,1.0,0.0,9.0,4.0,3.0
1,0.0,0.0,0.0,0.0,25.0,1.0,0.0,0.0,1.0,0.0,...,0.0,1.0,3.0,0.0,0.0,0.0,0.0,7.0,6.0,1.0
2,0.0,1.0,1.0,1.0,28.0,0.0,0.0,0.0,0.0,1.0,...,1.0,1.0,5.0,30.0,30.0,1.0,0.0,9.0,4.0,8.0
3,0.0,1.0,0.0,1.0,27.0,0.0,0.0,0.0,1.0,1.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,11.0,3.0,6.0
4,0.0,1.0,1.0,1.0,24.0,0.0,0.0,0.0,1.0,1.0,...,1.0,0.0,2.0,3.0,0.0,0.0,0.0,11.0,5.0,4.0


In [33]:
for col in data.columns:
  data[col] = data[col].apply(lambda x: float(x) if str(x).replace('.', '', 1).isdigit() else None)

In [34]:
data=data.dropna()
data.shape

(437153, 22)

In [35]:
data.columns

Index(['HeartDiseaseorAttack', 'HighBP', 'HighChol', 'CholCheck', 'BMI',
       'Smoker', 'Stroke', 'Diabetes', 'PhysActivity', 'Fruits', 'Veggies',
       'HvyAlcoholConsump', 'AnyHealthcare', 'NoDocbcCost', 'GenHlth',
       'MentHlth', 'PhysHlth', 'DiffWalk', 'Sex', 'Age', 'Education',
       'Income'],
      dtype='object')

In [36]:
def load_training_data(df):
    from sklearn.model_selection import train_test_split
    import pandas as pd
    c_label = 'HeartDiseaseorAttack'

    y = df[c_label].values
    X = df.loc[:, df.columns != c_label]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=13)
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = load_training_data(data)


In [37]:
import numpy as np
loaded = list()
loaded.append(X_train)
# print(loaded)
X_train = np.dstack(loaded)
loaded = list()
loaded.append(X_test)
X_test = np.dstack(loaded)
X_train.shape

X_train = np.asarray(X_train).astype('float32')
X_test = np.asarray(X_test).astype('float32')

In [38]:
from tensorflow.keras.utils import to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
model = my_cnn(input_size=None)
# X_train = np.dstack(X_train)
# X_test = np.dstack(X_test)
model.train(X_train, y_train, X_test, y_test, epochs=5, batch_size=300)
m = model.get_model()



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


None
Epoch 1/5
[1m977/977[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 30ms/step - accuracy: 0.9027 - loss: 0.2839 - val_accuracy: 0.9072 - val_loss: 0.2415
Epoch 2/5
[1m977/977[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 30ms/step - accuracy: 0.9070 - loss: 0.2445 - val_accuracy: 0.9071 - val_loss: 0.2408
Epoch 3/5
[1m977/977[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 30ms/step - accuracy: 0.9074 - loss: 0.2430 - val_accuracy: 0.9074 - val_loss: 0.2399
Epoch 4/5
[1m977/977[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 30ms/step - accuracy: 0.9067 - loss: 0.2434 - val_accuracy: 0.9075 - val_loss: 0.2404
Epoch 5/5
[1m977/977[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 29ms/step - accuracy: 0.9068 - loss: 0.2435 - val_accuracy: 0.9072 - val_loss: 0.2394


In [39]:
y_pred = model.predict(X_test=X_test,batch_size=300)

[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step


In [40]:
y_pred

array([0, 0, 0, ..., 0, 0, 0])

In [47]:
## Evaluate Results
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
print('Accuracy: ', np.round(accuracy_score(y_test[:,1], y_pred),3))
print('F1 Score: ', np.round(f1_score(y_test[:,1], y_pred),3))
print('Precision: ', np.round(precision_score(y_test[:,1], y_pred),3))
print('Recall: ', np.round(recall_score(y_test[:,1], y_pred),3))
print('***************')

Accuracy:  0.907
F1 Score:  0.205
Precision:  0.544
Recall:  0.126
***************


array([1., 1., 1., ..., 0., 1., 1.])