# Machine Learning Diabetes Classification

## Read csv and perform basic data cleaning

In [1]:
# Install zipfile36 if you haven't already
# !pip install zipfile36



In [46]:
# Import our dependencies
import pandas as pd
import numpy as np
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
from matplotlib import pyplot as plt

# SQL
from sqlalchemy import create_engine
import sqlite3 as sql

# Machine learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score
import tensorflow as tf

In [2]:
# Create dataframe
z = urlopen('https://archive.ics.uci.edu/ml/machine-learning-databases/00296/dataset_diabetes.zip')
myzip = ZipFile(BytesIO(z.read())).extract('dataset_diabetes/diabetic_data.csv')
df = pd.read_csv(myzip)
df.head(5)
target = ['A1Cresult']

In [3]:
# Drop the non-beneficial ID columns, 'encounter_id' and 'patient_nbr'
df = df.drop(['patient_nbr'],1)

# Drop mostly empty columns, 'weight', 'payer_code', 'max_glu_serum', and 'medical_specialty'
df = df.drop(['weight', 'payer_code', 'max_glu_serum', 'medical_specialty', ],1)

# Replace '?' values to nulls
df.replace({'?': np.nan}, inplace=True)

# Replace 'None' values to nulls
df.replace({'None': np.nan}, inplace=True)

# Drop the null rows
df = df.dropna()

  
  """


In [4]:
# Convert the target column values to normal and high based on their values
x = {'Norm': 'low'}   
df = df.replace(x)
x = dict.fromkeys(['>7', '>8'], 'high')    
df = df.replace(x)
df.reset_index(inplace=True, drop=True)

In [5]:
# Determine the number of unique values in each column.
df.nunique()

encounter_id                16193
race                            5
gender                          2
age                            10
admission_type_id               8
discharge_disposition_id       21
admission_source_id            15
time_in_hospital               14
num_lab_procedures            114
num_procedures                  7
num_medications                67
number_outpatient              24
number_emergency               19
number_inpatient               18
diag_1                        490
diag_2                        486
diag_3                        539
number_diagnoses               12
A1Cresult                       2
metformin                       4
repaglinide                     4
nateglinide                     4
chlorpropamide                  2
glimepiride                     4
acetohexamide                   1
glipizide                       4
glyburide                       4
tolbutamide                     2
pioglitazone                    4
rosiglitazone 

In [6]:
# Drop columns with only 1 value
df = df.drop(['acetohexamide', 'troglitazone', 'examide', 'citoglipton','glimepiride-pioglitazone', 'metformin-rosiglitazone', 'metformin-pioglitazone'],1)
df.nunique()

  


encounter_id                16193
race                            5
gender                          2
age                            10
admission_type_id               8
discharge_disposition_id       21
admission_source_id            15
time_in_hospital               14
num_lab_procedures            114
num_procedures                  7
num_medications                67
number_outpatient              24
number_emergency               19
number_inpatient               18
diag_1                        490
diag_2                        486
diag_3                        539
number_diagnoses               12
A1Cresult                       2
metformin                       4
repaglinide                     4
nateglinide                     4
chlorpropamide                  2
glimepiride                     4
glipizide                       4
glyburide                       4
tolbutamide                     2
pioglitazone                    4
rosiglitazone                   4
acarbose      

In [7]:
#drop variables with outliers ()
df = df.drop(['number_inpatient', 'number_outpatient','number_emergency'],1)
df.nunique()

  


encounter_id                16193
race                            5
gender                          2
age                            10
admission_type_id               8
discharge_disposition_id       21
admission_source_id            15
time_in_hospital               14
num_lab_procedures            114
num_procedures                  7
num_medications                67
diag_1                        490
diag_2                        486
diag_3                        539
number_diagnoses               12
A1Cresult                       2
metformin                       4
repaglinide                     4
nateglinide                     4
chlorpropamide                  2
glimepiride                     4
glipizide                       4
glyburide                       4
tolbutamide                     2
pioglitazone                    4
rosiglitazone                   4
acarbose                        4
miglitol                        4
tolazamide                      3
insulin       

In [8]:
#Info on the data frame data types, .non_null, etc. 
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16193 entries, 0 to 16192
Data columns (total 35 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   encounter_id              16193 non-null  int64 
 1   race                      16193 non-null  object
 2   gender                    16193 non-null  object
 3   age                       16193 non-null  object
 4   admission_type_id         16193 non-null  int64 
 5   discharge_disposition_id  16193 non-null  int64 
 6   admission_source_id       16193 non-null  int64 
 7   time_in_hospital          16193 non-null  int64 
 8   num_lab_procedures        16193 non-null  int64 
 9   num_procedures            16193 non-null  int64 
 10  num_medications           16193 non-null  int64 
 11  diag_1                    16193 non-null  object
 12  diag_2                    16193 non-null  object
 13  diag_3                    16193 non-null  object
 14  number_diagnoses      

In [9]:
#Looking at dataframe 
df.head()

Unnamed: 0,encounter_id,race,gender,age,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,num_lab_procedures,num_procedures,...,rosiglitazone,acarbose,miglitol,tolazamide,insulin,glyburide-metformin,glipizide-metformin,change,diabetesMed,readmitted
0,236316,Caucasian,Male,[80-90),1,3,7,6,64,3,...,No,No,No,No,No,No,No,Ch,Yes,NO
1,955884,Caucasian,Female,[70-80),1,3,7,5,34,0,...,No,No,No,No,Up,No,No,Ch,Yes,>30
2,1257282,Other,Female,[50-60),1,1,7,2,53,0,...,No,No,No,No,Up,No,No,Ch,Yes,NO
3,1270524,Caucasian,Male,[60-70),1,2,7,1,59,0,...,No,No,No,No,Steady,No,No,No,Yes,NO
4,1455252,Caucasian,Female,[80-90),1,1,7,3,34,0,...,No,No,No,No,No,No,No,No,No,>30


In [10]:
# Generate our categorical variable list
df_cat = df.dtypes[df.dtypes == "object"].index.tolist()
df_cat

['race',
 'gender',
 'age',
 'diag_1',
 'diag_2',
 'diag_3',
 'A1Cresult',
 'metformin',
 'repaglinide',
 'nateglinide',
 'chlorpropamide',
 'glimepiride',
 'glipizide',
 'glyburide',
 'tolbutamide',
 'pioglitazone',
 'rosiglitazone',
 'acarbose',
 'miglitol',
 'tolazamide',
 'insulin',
 'glyburide-metformin',
 'glipizide-metformin',
 'change',
 'diabetesMed',
 'readmitted']

In [11]:
# Check the number of unique values in each column
df[df_cat].nunique()

race                     5
gender                   2
age                     10
diag_1                 490
diag_2                 486
diag_3                 539
A1Cresult                2
metformin                4
repaglinide              4
nateglinide              4
chlorpropamide           2
glimepiride              4
glipizide                4
glyburide                4
tolbutamide              2
pioglitazone             4
rosiglitazone            4
acarbose                 4
miglitol                 4
tolazamide               3
insulin                  4
glyburide-metformin      4
glipizide-metformin      2
change                   2
diabetesMed              2
readmitted               3
dtype: int64

In [12]:
# Check the unique value counts to see if binning is required
df.diag_1.value_counts()

428    1144
414     976
786     853
410     806
486     531
       ... 
237       1
617       1
356       1
989       1
893       1
Name: diag_1, Length: 490, dtype: int64

In [13]:
# Check the unique value counts to see if binning is required
df.diag_2.value_counts()

276       1280
428        938
250        753
427        739
250.02     677
          ... 
316          1
E858         1
980          1
725          1
110          1
Name: diag_2, Length: 486, dtype: int64

In [14]:
# Check the unique value counts to see if binning is required
df.diag_3.value_counts()

250    1557
401    1229
276    1057
428     665
414     550
       ... 
550       1
V55       1
314       1
579       1
825       1
Name: diag_3, Length: 539, dtype: int64

In [15]:
# Create SQLite engine using SQLAlechmey
engine = create_engine('sqlite:///diabetes_data.db', echo=False)
conn = engine.connect()
df.to_sql('diabetes', conn, index=False, if_exists='replace')

In [16]:
# Check dataset
pd.read_sql('SELECT * FROM diabetes LIMIT 10', conn)

Unnamed: 0,encounter_id,race,gender,age,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,num_lab_procedures,num_procedures,...,rosiglitazone,acarbose,miglitol,tolazamide,insulin,glyburide-metformin,glipizide-metformin,change,diabetesMed,readmitted
0,236316,Caucasian,Male,[80-90),1,3,7,6,64,3,...,No,No,No,No,No,No,No,Ch,Yes,NO
1,955884,Caucasian,Female,[70-80),1,3,7,5,34,0,...,No,No,No,No,Up,No,No,Ch,Yes,>30
2,1257282,Other,Female,[50-60),1,1,7,2,53,0,...,No,No,No,No,Up,No,No,Ch,Yes,NO
3,1270524,Caucasian,Male,[60-70),1,2,7,1,59,0,...,No,No,No,No,Steady,No,No,No,Yes,NO
4,1455252,Caucasian,Female,[80-90),1,1,7,3,34,0,...,No,No,No,No,No,No,No,No,No,>30
5,1810752,Caucasian,Male,[70-80),1,3,7,14,78,1,...,No,No,No,No,Up,No,No,Ch,Yes,<30
6,1881372,Caucasian,Male,[60-70),1,2,7,4,65,2,...,No,No,No,No,Steady,No,No,No,Yes,<30
7,1968528,Caucasian,Female,[70-80),6,25,1,10,56,2,...,No,No,No,No,Down,No,No,Ch,Yes,>30
8,2092362,Caucasian,Female,[70-80),6,25,7,11,88,1,...,No,No,No,No,Down,No,No,Ch,Yes,>30
9,2095932,AfricanAmerican,Female,[30-40),6,25,7,8,62,0,...,No,No,No,No,Steady,No,No,Ch,Yes,>30


In [17]:
# Create cleanup table
engine.execute('CREATE TABLE "cleaned_columns" ('
               'id BIGINT NOT NULL,'
               'diag_1c VARCHAR, '
               'diag_2c VARCHAR, '
               'diag_3c VARCHAR, '
               'PRIMARY KEY (id));')

OperationalError: (sqlite3.OperationalError) table "cleaned_columns" already exists
[SQL: CREATE TABLE "cleaned_columns" (id BIGINT NOT NULL,diag_1c VARCHAR, diag_2c VARCHAR, diag_3c VARCHAR, PRIMARY KEY (id));]
(Background on this error at: https://sqlalche.me/e/14/e3q8)

In [18]:
# Insert columns into new table to clean
engine.execute('INSERT INTO "cleaned_columns" '
               'SELECT encounter_id AS id, '
               'CASE '
                   'WHEN (diag_1 BETWEEN 390 AND 459) OR (diag_1 = 785) THEN "circulatory" '
                   'WHEN (diag_1 BETWEEN 460 AND 519) OR (diag_1 = 786) THEN "respiratory" '
                   'WHEN (diag_1 BETWEEN 520 AND 579) OR (diag_1 = 787) THEN "digestive" '
                   'WHEN (diag_1 BETWEEN 250 AND 251) THEN "diabetes" '
                   'WHEN (diag_1 BETWEEN 800 AND 999) THEN "injury" '
                   'WHEN (diag_1 BETWEEN 710 AND 739) THEN "musculoskeletal" '
                   'WHEN (diag_1 BETWEEN 580 AND 629) OR (diag_1 = 788) THEN "genitourinary" '
                   'WHEN (diag_1 BETWEEN 140 AND 239) THEN "neoplasms" '
                   'ELSE "other" '
               'END AS diag_1c, '
               'CASE '
                   'WHEN (diag_2 BETWEEN 390 AND 459) OR (diag_2 = 785) THEN "circulatory" '
                   'WHEN (diag_2 BETWEEN 460 AND 519) OR (diag_2 = 786) THEN "respiratory" '
                   'WHEN (diag_2 BETWEEN 520 AND 579) OR (diag_2 = 787) THEN "digestive" '
                   'WHEN (diag_2 BETWEEN 250 AND 251) THEN "diabetes" '
                   'WHEN (diag_2 BETWEEN 800 AND 999) THEN "injury" '
                   'WHEN (diag_2 BETWEEN 710 AND 739) THEN "musculoskeletal" '
                   'WHEN (diag_2 BETWEEN 580 AND 629) OR (diag_2 = 788) THEN "genitourinary" '
                   'WHEN (diag_2 BETWEEN 140 AND 239) THEN "neoplasms" '
                   'ELSE "other" '
               'END AS diag_2c, '
               'CASE '
                   'WHEN (diag_3 BETWEEN 390 AND 459) OR (diag_3 = 785) THEN "circulatory" '
                   'WHEN (diag_3 BETWEEN 460 AND 519) OR (diag_3 = 786) THEN "respiratory" '
                   'WHEN (diag_3 BETWEEN 520 AND 579) OR (diag_3 = 787) THEN "digestive" '
                   'WHEN (diag_3 BETWEEN 250 AND 251) THEN "diabetes" '
                   'WHEN (diag_3 BETWEEN 800 AND 999) THEN "injury" '
                   'WHEN (diag_3 BETWEEN 710 AND 739) THEN "musculoskeletal" '
                   'WHEN (diag_3 BETWEEN 580 AND 629) OR (diag_3 = 788) THEN "genitourinary" '
                   'WHEN (diag_3 BETWEEN 140 AND 239) THEN "neoplasms" '
                   'ELSE "other" '
               'END AS diag_3c '
               'FROM diabetes;')

IntegrityError: (sqlite3.IntegrityError) UNIQUE constraint failed: cleaned_columns.id
[SQL: INSERT INTO "cleaned_columns" SELECT encounter_id AS id, CASE WHEN (diag_1 BETWEEN 390 AND 459) OR (diag_1 = 785) THEN "circulatory" WHEN (diag_1 BETWEEN 460 AND 519) OR (diag_1 = 786) THEN "respiratory" WHEN (diag_1 BETWEEN 520 AND 579) OR (diag_1 = 787) THEN "digestive" WHEN (diag_1 BETWEEN 250 AND 251) THEN "diabetes" WHEN (diag_1 BETWEEN 800 AND 999) THEN "injury" WHEN (diag_1 BETWEEN 710 AND 739) THEN "musculoskeletal" WHEN (diag_1 BETWEEN 580 AND 629) OR (diag_1 = 788) THEN "genitourinary" WHEN (diag_1 BETWEEN 140 AND 239) THEN "neoplasms" ELSE "other" END AS diag_1c, CASE WHEN (diag_2 BETWEEN 390 AND 459) OR (diag_2 = 785) THEN "circulatory" WHEN (diag_2 BETWEEN 460 AND 519) OR (diag_2 = 786) THEN "respiratory" WHEN (diag_2 BETWEEN 520 AND 579) OR (diag_2 = 787) THEN "digestive" WHEN (diag_2 BETWEEN 250 AND 251) THEN "diabetes" WHEN (diag_2 BETWEEN 800 AND 999) THEN "injury" WHEN (diag_2 BETWEEN 710 AND 739) THEN "musculoskeletal" WHEN (diag_2 BETWEEN 580 AND 629) OR (diag_2 = 788) THEN "genitourinary" WHEN (diag_2 BETWEEN 140 AND 239) THEN "neoplasms" ELSE "other" END AS diag_2c, CASE WHEN (diag_3 BETWEEN 390 AND 459) OR (diag_3 = 785) THEN "circulatory" WHEN (diag_3 BETWEEN 460 AND 519) OR (diag_3 = 786) THEN "respiratory" WHEN (diag_3 BETWEEN 520 AND 579) OR (diag_3 = 787) THEN "digestive" WHEN (diag_3 BETWEEN 250 AND 251) THEN "diabetes" WHEN (diag_3 BETWEEN 800 AND 999) THEN "injury" WHEN (diag_3 BETWEEN 710 AND 739) THEN "musculoskeletal" WHEN (diag_3 BETWEEN 580 AND 629) OR (diag_3 = 788) THEN "genitourinary" WHEN (diag_3 BETWEEN 140 AND 239) THEN "neoplasms" ELSE "other" END AS diag_3c FROM diabetes;]
(Background on this error at: https://sqlalche.me/e/14/gkpj)

In [19]:
# Check data
pd.read_sql('SELECT * FROM cleaned_columns LIMIT 10', conn)

Unnamed: 0,id,diag_1c,diag_2c,diag_3c
0,236316,circulatory,circulatory,circulatory
1,955884,circulatory,circulatory,other
2,1257282,genitourinary,diabetes,circulatory
3,1270524,circulatory,circulatory,respiratory
4,1455252,circulatory,circulatory,circulatory
5,1810752,circulatory,diabetes,diabetes
6,1881372,circulatory,circulatory,respiratory
7,1968528,circulatory,circulatory,diabetes
8,2092362,diabetes,other,genitourinary
9,2095932,diabetes,circulatory,other


In [20]:
# Join diabetes and cleaned_columns tables
diabetes_df = pd.read_sql('SELECT * FROM diabetes JOIN cleaned_columns ON diabetes.encounter_id = cleaned_columns.id', conn)
diabetes_df 

Unnamed: 0,encounter_id,race,gender,age,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,num_lab_procedures,num_procedures,...,insulin,glyburide-metformin,glipizide-metformin,change,diabetesMed,readmitted,id,diag_1c,diag_2c,diag_3c
0,236316,Caucasian,Male,[80-90),1,3,7,6,64,3,...,No,No,No,Ch,Yes,NO,236316,circulatory,circulatory,circulatory
1,955884,Caucasian,Female,[70-80),1,3,7,5,34,0,...,Up,No,No,Ch,Yes,>30,955884,circulatory,circulatory,other
2,1257282,Other,Female,[50-60),1,1,7,2,53,0,...,Up,No,No,Ch,Yes,NO,1257282,genitourinary,diabetes,circulatory
3,1270524,Caucasian,Male,[60-70),1,2,7,1,59,0,...,Steady,No,No,No,Yes,NO,1270524,circulatory,circulatory,respiratory
4,1455252,Caucasian,Female,[80-90),1,1,7,3,34,0,...,No,No,No,No,No,>30,1455252,circulatory,circulatory,circulatory
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16188,443816024,Caucasian,Female,[70-80),3,6,1,3,27,1,...,Steady,No,No,Ch,Yes,NO,443816024,musculoskeletal,circulatory,diabetes
16189,443835140,Caucasian,Male,[70-80),3,6,1,13,77,6,...,Up,No,No,Ch,Yes,NO,443835140,circulatory,circulatory,respiratory
16190,443842016,Caucasian,Female,[70-80),1,1,7,9,50,2,...,Steady,No,No,Ch,Yes,>30,443842016,digestive,digestive,diabetes
16191,443842022,Other,Female,[40-50),1,1,7,14,73,6,...,Up,No,No,Ch,Yes,>30,443842022,genitourinary,genitourinary,respiratory


In [21]:
#Check Datatypes
diabetes_df.dtypes

encounter_id                 int64
race                        object
gender                      object
age                         object
admission_type_id            int64
discharge_disposition_id     int64
admission_source_id          int64
time_in_hospital             int64
num_lab_procedures           int64
num_procedures               int64
num_medications              int64
diag_1                      object
diag_2                      object
diag_3                      object
number_diagnoses             int64
A1Cresult                   object
metformin                   object
repaglinide                 object
nateglinide                 object
chlorpropamide              object
glimepiride                 object
glipizide                   object
glyburide                   object
tolbutamide                 object
pioglitazone                object
rosiglitazone               object
acarbose                    object
miglitol                    object
tolazamide          

In [22]:
diabetes_df

Unnamed: 0,encounter_id,race,gender,age,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,num_lab_procedures,num_procedures,...,insulin,glyburide-metformin,glipizide-metformin,change,diabetesMed,readmitted,id,diag_1c,diag_2c,diag_3c
0,236316,Caucasian,Male,[80-90),1,3,7,6,64,3,...,No,No,No,Ch,Yes,NO,236316,circulatory,circulatory,circulatory
1,955884,Caucasian,Female,[70-80),1,3,7,5,34,0,...,Up,No,No,Ch,Yes,>30,955884,circulatory,circulatory,other
2,1257282,Other,Female,[50-60),1,1,7,2,53,0,...,Up,No,No,Ch,Yes,NO,1257282,genitourinary,diabetes,circulatory
3,1270524,Caucasian,Male,[60-70),1,2,7,1,59,0,...,Steady,No,No,No,Yes,NO,1270524,circulatory,circulatory,respiratory
4,1455252,Caucasian,Female,[80-90),1,1,7,3,34,0,...,No,No,No,No,No,>30,1455252,circulatory,circulatory,circulatory
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16188,443816024,Caucasian,Female,[70-80),3,6,1,3,27,1,...,Steady,No,No,Ch,Yes,NO,443816024,musculoskeletal,circulatory,diabetes
16189,443835140,Caucasian,Male,[70-80),3,6,1,13,77,6,...,Up,No,No,Ch,Yes,NO,443835140,circulatory,circulatory,respiratory
16190,443842016,Caucasian,Female,[70-80),1,1,7,9,50,2,...,Steady,No,No,Ch,Yes,>30,443842016,digestive,digestive,diabetes
16191,443842022,Other,Female,[40-50),1,1,7,14,73,6,...,Up,No,No,Ch,Yes,>30,443842022,genitourinary,genitourinary,respiratory


In [23]:
# Check the unique value counts to see if binning is required
diabetes_df.A1Cresult.value_counts()


high    11339
low      4854
Name: A1Cresult, dtype: int64

In [None]:
#taking target out of df 
# object_db_df = 

In [18]:
# Chceck to see if we need to bin any of the groups. 

# Bin the values based on the medical codes 

In [None]:
#Drop 'Change' variable. 

In [24]:
# Generate our categorical variable list
diabetes_df_cat = diabetes_df.dtypes[diabetes_df.dtypes == "object"].index.tolist()
diabetes_df_cat

['race',
 'gender',
 'age',
 'diag_1',
 'diag_2',
 'diag_3',
 'A1Cresult',
 'metformin',
 'repaglinide',
 'nateglinide',
 'chlorpropamide',
 'glimepiride',
 'glipizide',
 'glyburide',
 'tolbutamide',
 'pioglitazone',
 'rosiglitazone',
 'acarbose',
 'miglitol',
 'tolazamide',
 'insulin',
 'glyburide-metformin',
 'glipizide-metformin',
 'change',
 'diabetesMed',
 'readmitted',
 'diag_1c',
 'diag_2c',
 'diag_3c']

In [25]:
# Drop the non-beneficial ID and diag columns, 'encounter_id', 'id', 'diag_1', 'diag_2', and 'diag_3'
diabetes_2_df = diabetes_df.drop(['encounter_id', 'id', 'diag_1', 'diag_2', 'diag_3'],1)

  


In [26]:
diabetes_2_df 

Unnamed: 0,race,gender,age,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,num_lab_procedures,num_procedures,num_medications,...,tolazamide,insulin,glyburide-metformin,glipizide-metformin,change,diabetesMed,readmitted,diag_1c,diag_2c,diag_3c
0,Caucasian,Male,[80-90),1,3,7,6,64,3,18,...,No,No,No,No,Ch,Yes,NO,circulatory,circulatory,circulatory
1,Caucasian,Female,[70-80),1,3,7,5,34,0,17,...,No,Up,No,No,Ch,Yes,>30,circulatory,circulatory,other
2,Other,Female,[50-60),1,1,7,2,53,0,6,...,No,Up,No,No,Ch,Yes,NO,genitourinary,diabetes,circulatory
3,Caucasian,Male,[60-70),1,2,7,1,59,0,12,...,No,Steady,No,No,No,Yes,NO,circulatory,circulatory,respiratory
4,Caucasian,Female,[80-90),1,1,7,3,34,0,11,...,No,No,No,No,No,No,>30,circulatory,circulatory,circulatory
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16188,Caucasian,Female,[70-80),3,6,1,3,27,1,29,...,No,Steady,No,No,Ch,Yes,NO,musculoskeletal,circulatory,diabetes
16189,Caucasian,Male,[70-80),3,6,1,13,77,6,65,...,No,Up,No,No,Ch,Yes,NO,circulatory,circulatory,respiratory
16190,Caucasian,Female,[70-80),1,1,7,9,50,2,33,...,No,Steady,No,No,Ch,Yes,>30,digestive,digestive,diabetes
16191,Other,Female,[40-50),1,1,7,14,73,6,26,...,No,Up,No,No,Ch,Yes,>30,genitourinary,genitourinary,respiratory


In [27]:
# Check data
diabetes_2_df.dtypes

race                        object
gender                      object
age                         object
admission_type_id            int64
discharge_disposition_id     int64
admission_source_id          int64
time_in_hospital             int64
num_lab_procedures           int64
num_procedures               int64
num_medications              int64
number_diagnoses             int64
A1Cresult                   object
metformin                   object
repaglinide                 object
nateglinide                 object
chlorpropamide              object
glimepiride                 object
glipizide                   object
glyburide                   object
tolbutamide                 object
pioglitazone                object
rosiglitazone               object
acarbose                    object
miglitol                    object
tolazamide                  object
insulin                     object
glyburide-metformin         object
glipizide-metformin         object
change              

In [28]:
# # Generate our categorical variable list
diabetes_2_df_cat = diabetes_2_df.dtypes[diabetes_2_df.dtypes == "object"].index.tolist()
diabetes_2_df_cat


['race',
 'gender',
 'age',
 'A1Cresult',
 'metformin',
 'repaglinide',
 'nateglinide',
 'chlorpropamide',
 'glimepiride',
 'glipizide',
 'glyburide',
 'tolbutamide',
 'pioglitazone',
 'rosiglitazone',
 'acarbose',
 'miglitol',
 'tolazamide',
 'insulin',
 'glyburide-metformin',
 'glipizide-metformin',
 'change',
 'diabetesMed',
 'readmitted',
 'diag_1c',
 'diag_2c',
 'diag_3c']

In [29]:
diabetes_2_df_cat

['race',
 'gender',
 'age',
 'A1Cresult',
 'metformin',
 'repaglinide',
 'nateglinide',
 'chlorpropamide',
 'glimepiride',
 'glipizide',
 'glyburide',
 'tolbutamide',
 'pioglitazone',
 'rosiglitazone',
 'acarbose',
 'miglitol',
 'tolazamide',
 'insulin',
 'glyburide-metformin',
 'glipizide-metformin',
 'change',
 'diabetesMed',
 'readmitted',
 'diag_1c',
 'diag_2c',
 'diag_3c']

In [30]:
# #create datframe without A1C Results
# db_2_df = diabetes_df.drop(columns = "A1Cresult")
# db_2_df

In [96]:
# db_2_df.dtypes 


In [97]:
# # Generate our categorical variable list
# db_df_cat = db_2_df.dtypes[db_2_df.dtypes == "object"].index.tolist()
# db_df_cat


In [31]:
# Error somewhere is this frame. Missing values 

#OneHot encode the categorical data. 
from sklearn.preprocessing import OneHotEncoder

# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(diabetes_2_df[diabetes_2_df_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names_out(diabetes_2_df_cat)
encode_df.head()

Unnamed: 0,race_AfricanAmerican,race_Asian,race_Caucasian,race_Hispanic,race_Other,gender_Female,gender_Male,age_[0-10),age_[10-20),age_[20-30),...,diag_2c_respiratory,diag_3c_circulatory,diag_3c_diabetes,diag_3c_digestive,diag_3c_genitourinary,diag_3c_injury,diag_3c_musculoskeletal,diag_3c_neoplasms,diag_3c_other,diag_3c_respiratory
0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
encode_df.info

<bound method DataFrame.info of        race_AfricanAmerican  race_Asian  race_Caucasian  race_Hispanic  \
0                       0.0         0.0             1.0            0.0   
1                       0.0         0.0             1.0            0.0   
2                       0.0         0.0             0.0            0.0   
3                       0.0         0.0             1.0            0.0   
4                       0.0         0.0             1.0            0.0   
...                     ...         ...             ...            ...   
16188                   0.0         0.0             1.0            0.0   
16189                   0.0         0.0             1.0            0.0   
16190                   0.0         0.0             1.0            0.0   
16191                   0.0         0.0             0.0            0.0   
16192                   1.0         0.0             0.0            0.0   

       race_Other  gender_Female  gender_Male  age_[0-10)  age_[10-20)  \
0    

In [33]:
encode_df.head(20)

Unnamed: 0,race_AfricanAmerican,race_Asian,race_Caucasian,race_Hispanic,race_Other,gender_Female,gender_Male,age_[0-10),age_[10-20),age_[20-30),...,diag_2c_respiratory,diag_3c_circulatory,diag_3c_diabetes,diag_3c_digestive,diag_3c_genitourinary,diag_3c_injury,diag_3c_musculoskeletal,diag_3c_neoplasms,diag_3c_other,diag_3c_respiratory
0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
7,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
9,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [34]:
for col_name in encode_df.columns: 
    print(col_name)

race_AfricanAmerican
race_Asian
race_Caucasian
race_Hispanic
race_Other
gender_Female
gender_Male
age_[0-10)
age_[10-20)
age_[20-30)
age_[30-40)
age_[40-50)
age_[50-60)
age_[60-70)
age_[70-80)
age_[80-90)
age_[90-100)
A1Cresult_high
A1Cresult_low
metformin_Down
metformin_No
metformin_Steady
metformin_Up
repaglinide_Down
repaglinide_No
repaglinide_Steady
repaglinide_Up
nateglinide_Down
nateglinide_No
nateglinide_Steady
nateglinide_Up
chlorpropamide_No
chlorpropamide_Steady
glimepiride_Down
glimepiride_No
glimepiride_Steady
glimepiride_Up
glipizide_Down
glipizide_No
glipizide_Steady
glipizide_Up
glyburide_Down
glyburide_No
glyburide_Steady
glyburide_Up
tolbutamide_No
tolbutamide_Steady
pioglitazone_Down
pioglitazone_No
pioglitazone_Steady
pioglitazone_Up
rosiglitazone_Down
rosiglitazone_No
rosiglitazone_Steady
rosiglitazone_Up
acarbose_Down
acarbose_No
acarbose_Steady
acarbose_Up
miglitol_Down
miglitol_No
miglitol_Steady
miglitol_Up
tolazamide_No
tolazamide_Steady
tolazamide_Up
insulin_D

In [35]:
# # Merge one-hot encoded features and drop the originals
diabetes_2_df = diabetes_2_df.merge(encode_df,left_index=True, right_index=True)
diabetes_2_df = diabetes_2_df.drop(diabetes_2_df_cat,1)
diabetes_2_df.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,num_lab_procedures,num_procedures,num_medications,number_diagnoses,race_AfricanAmerican,race_Asian,...,diag_2c_respiratory,diag_3c_circulatory,diag_3c_diabetes,diag_3c_digestive,diag_3c_genitourinary,diag_3c_injury,diag_3c_musculoskeletal,diag_3c_neoplasms,diag_3c_other,diag_3c_respiratory
0,1,3,7,6,64,3,18,7,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,3,7,5,34,0,17,7,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1,1,7,2,53,0,6,3,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,2,7,1,59,0,12,7,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,1,1,7,3,34,0,11,8,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
for col_name in diabetes_2_df.columns: 
    print(col_name)

admission_type_id
discharge_disposition_id
admission_source_id
time_in_hospital
num_lab_procedures
num_procedures
num_medications
number_diagnoses
race_AfricanAmerican
race_Asian
race_Caucasian
race_Hispanic
race_Other
gender_Female
gender_Male
age_[0-10)
age_[10-20)
age_[20-30)
age_[30-40)
age_[40-50)
age_[50-60)
age_[60-70)
age_[70-80)
age_[80-90)
age_[90-100)
A1Cresult_high
A1Cresult_low
metformin_Down
metformin_No
metformin_Steady
metformin_Up
repaglinide_Down
repaglinide_No
repaglinide_Steady
repaglinide_Up
nateglinide_Down
nateglinide_No
nateglinide_Steady
nateglinide_Up
chlorpropamide_No
chlorpropamide_Steady
glimepiride_Down
glimepiride_No
glimepiride_Steady
glimepiride_Up
glipizide_Down
glipizide_No
glipizide_Steady
glipizide_Up
glyburide_Down
glyburide_No
glyburide_Steady
glyburide_Up
tolbutamide_No
tolbutamide_Steady
pioglitazone_Down
pioglitazone_No
pioglitazone_Steady
pioglitazone_Up
rosiglitazone_Down
rosiglitazone_No
rosiglitazone_Steady
rosiglitazone_Up
acarbose_Down
ac

In [54]:
#Create new dataframe with top only 8 features
top_8_df = diabetes_2_df[['num_lab_procedures','num_medications','time_in_hospital','num_procedures','number_diagnoses','discharge_disposition_id','admission_type_id','admission_source_id']] 
top_8_df.head(20)

Unnamed: 0,num_lab_procedures,num_medications,time_in_hospital,num_procedures,number_diagnoses,discharge_disposition_id,admission_type_id,admission_source_id
0,64,18,6,3,7,3,1,7
1,34,17,5,0,7,3,1,7
2,53,6,2,0,3,1,1,7
3,59,12,1,0,7,2,1,7
4,34,11,3,0,8,1,1,7
5,78,19,14,1,8,3,1,7
6,65,19,4,2,7,2,1,7
7,56,24,10,2,9,25,6,1
8,88,16,11,1,9,25,6,7
9,62,21,8,0,9,25,6,7


In [55]:
#remove target from features data
y = diabetes_2_df['A1Cresult_high']
X = top_8_df
# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [56]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Random forest predictive accuracy: 0.661


In [57]:
importances = rf_model.feature_importances_

In [58]:
# List the features sorted in descending order by feature importance
importances = rf_model.feature_importances_
importances
sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)

[(0.31764033064813757, 'num_lab_procedures'),
 (0.24568759084847203, 'num_medications'),
 (0.15557628673490595, 'time_in_hospital'),
 (0.08200127908277177, 'num_procedures'),
 (0.0609788534619337, 'discharge_disposition_id'),
 (0.056156345651080915, 'number_diagnoses'),
 (0.04579760909909611, 'admission_type_id'),
 (0.036161704473602, 'admission_source_id')]

In [70]:
#Trying PCA on original data 
# //95% of variance
from sklearn.decomposition import PCA
pca = PCA(n_components = 0.95)
pca.fit(diabetes_2_df)
reduced = pca.transform(diabetes_2_df)

In [74]:
#Call into dataframe 
transform_pca = pd.DataFrame(reduced)
transform_pca

Unnamed: 0,0,1,2,3,4
0,9.572246,-0.558393,-0.547942,0.682432,0.757815
1,-19.961935,4.699503,-0.728852,1.933973,1.141393
2,-4.139627,-10.890984,-1.705775,0.564171,-0.603588
3,2.993975,-6.358675,-1.311252,1.211684,-2.899928
4,-21.394304,-1.462835,-2.242847,1.836474,0.363659
...,...,...,...,...,...
16188,-24.488951,18.129370,1.134875,-2.220189,-3.793193
16189,32.820330,43.780869,-1.345880,-2.921411,-2.064557
16190,-0.636428,17.126420,-3.888855,1.749095,1.904950
16191,20.720911,6.619875,-2.777139,-0.446661,7.031781


In [72]:
#Trying PCA on original data 
# //99% of variance
from sklearn.decomposition import PCA
pca = PCA(n_components = 0.99)
pca.fit(diabetes_2_df)
reduced_2 = pca.transform(diabetes_2_df)

In [73]:
#Call into dataframe 
pd.DataFrame(reduced_2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,9.572246,-0.558393,-0.547942,0.682432,0.757815,-0.872143,1.066559,-1.373482,-0.613279,-0.771447,0.166179,1.808657,-0.696128,-0.290813,0.139251,0.284583,0.074012
1,-19.961935,4.699503,-0.728852,1.933973,1.141393,0.097490,-1.634172,-0.336566,-0.510335,0.677065,-1.064613,0.161341,-0.072877,0.102641,-0.324577,-0.919340,1.130830
2,-4.139627,-10.890984,-1.705775,0.564171,-0.603588,-3.390295,-1.421705,-1.179877,-0.756079,0.648961,0.095003,0.104965,-0.393104,0.356378,0.417169,-0.916143,-0.719730
3,2.993975,-6.358675,-1.311252,1.211684,-2.899928,0.145125,-0.690519,-0.748218,0.719178,-0.923494,0.461505,0.496734,0.636635,1.253902,-0.571963,-0.027103,0.538903
4,-21.394304,-1.462835,-2.242847,1.836474,0.363659,1.350683,-0.422874,-0.294457,1.808951,0.596643,-0.825768,1.027890,0.528830,-0.004062,-0.067388,-0.555597,-0.037362
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16188,-24.488951,18.129370,1.134875,-2.220189,-3.793193,0.943666,-0.905632,1.424822,-0.173442,0.641535,1.148047,0.975161,0.534606,0.252637,-0.427057,0.521642,0.401607
16189,32.820330,43.780869,-1.345880,-2.921411,-2.064557,4.623400,1.382733,1.223022,0.783380,-0.831292,0.951238,0.027477,0.339286,0.076690,0.349504,-0.484634,0.529494
16190,-0.636428,17.126420,-3.888855,1.749095,1.904950,0.675228,-1.257687,-0.487754,-0.186157,0.699669,-0.793051,-0.274232,-0.134696,0.342275,-0.632835,0.169929,0.003966
16191,20.720911,6.619875,-2.777139,-0.446661,7.031781,0.023051,2.949043,-1.480756,-0.923355,1.071970,-0.695601,-0.455375,0.026181,-0.400304,0.363831,-0.166115,-0.237620


In [75]:
#running Random Forest with the top 5 components from PCA
#remove target from features data
y = diabetes_2_df['A1Cresult_high']
X = transform_pca
# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [76]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Random forest predictive accuracy: 0.666


In [39]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 24
hidden_nodes_layer2 = 12

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2022-02-08 19:43:53.663443: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
127/127 - 0s - loss: 0.7099 - accuracy: 0.6631 - 162ms/epoch - 1ms/step
Loss: 0.7098603844642639, Accuracy: 0.6631267070770264


In [40]:
#
# Import checkpoint dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint path and filenames
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

In [41]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Create a callback that saves the model's weights every epoch
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    save_freq='epoch')

# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=100,callbacks=[cp_callback])

# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100
Epoch 00001: saving model to checkpoints/weights.01.hdf5
Epoch 2/100
Epoch 00002: saving model to checkpoints/weights.02.hdf5
Epoch 3/100
Epoch 00003: saving model to checkpoints/weights.03.hdf5
Epoch 4/100
Epoch 00004: saving model to checkpoints/weights.04.hdf5
Epoch 5/100
Epoch 00005: saving model to checkpoints/weights.05.hdf5
Epoch 6/100
Epoch 00006: saving model to checkpoints/weights.06.hdf5
Epoch 7/100
Epoch 00007: saving model to checkpoints/weights.07.hdf5
Epoch 8/100
Epoch 00008: saving model to checkpoints/weights.08.hdf5
Epoch 9/100
Epoch 00009: saving model to checkpoints/weights.09.hdf5
Epoch 10/100
Epoch 00010: saving model to checkpoints/weights.10.hdf5
Epoch 11/100
Epoch 00011: saving model to checkpoints/weights.11.hdf5
Epoch 12/100
Epoch 00012: saving model to checkpoints/weights.12.hdf5
Epoch 13/100
Epoch 00013: saving model to checkpoints/weights.13.hdf5
Epoch 14/100
Epoch 00014: saving model to checkpoints/weights.14.hdf5
Epoch 15/100
Epoch 00015: sav

Epoch 00034: saving model to checkpoints/weights.34.hdf5
Epoch 35/100
Epoch 00035: saving model to checkpoints/weights.35.hdf5
Epoch 36/100
Epoch 00036: saving model to checkpoints/weights.36.hdf5
Epoch 37/100
Epoch 00037: saving model to checkpoints/weights.37.hdf5
Epoch 38/100
Epoch 00038: saving model to checkpoints/weights.38.hdf5
Epoch 39/100
Epoch 00039: saving model to checkpoints/weights.39.hdf5
Epoch 40/100
Epoch 00040: saving model to checkpoints/weights.40.hdf5
Epoch 41/100
Epoch 00041: saving model to checkpoints/weights.41.hdf5
Epoch 42/100
Epoch 00042: saving model to checkpoints/weights.42.hdf5
Epoch 43/100
Epoch 00043: saving model to checkpoints/weights.43.hdf5
Epoch 44/100
Epoch 00044: saving model to checkpoints/weights.44.hdf5
Epoch 45/100
Epoch 00045: saving model to checkpoints/weights.45.hdf5
Epoch 46/100
Epoch 00046: saving model to checkpoints/weights.46.hdf5
Epoch 47/100
Epoch 00047: saving model to checkpoints/weights.47.hdf5
Epoch 48/100
Epoch 00048: saving 

Epoch 00067: saving model to checkpoints/weights.67.hdf5
Epoch 68/100
Epoch 00068: saving model to checkpoints/weights.68.hdf5
Epoch 69/100
Epoch 00069: saving model to checkpoints/weights.69.hdf5
Epoch 70/100
Epoch 00070: saving model to checkpoints/weights.70.hdf5
Epoch 71/100
Epoch 00071: saving model to checkpoints/weights.71.hdf5
Epoch 72/100
Epoch 00072: saving model to checkpoints/weights.72.hdf5
Epoch 73/100
Epoch 00073: saving model to checkpoints/weights.73.hdf5
Epoch 74/100
Epoch 00074: saving model to checkpoints/weights.74.hdf5
Epoch 75/100
Epoch 00075: saving model to checkpoints/weights.75.hdf5
Epoch 76/100
Epoch 00076: saving model to checkpoints/weights.76.hdf5
Epoch 77/100
Epoch 00077: saving model to checkpoints/weights.77.hdf5
Epoch 78/100
Epoch 00078: saving model to checkpoints/weights.78.hdf5
Epoch 79/100
Epoch 00079: saving model to checkpoints/weights.79.hdf5
Epoch 80/100
Epoch 00080: saving model to checkpoints/weights.80.hdf5
Epoch 81/100
Epoch 00081: saving 

Epoch 00100: saving model to checkpoints/weights.100.hdf5
127/127 - 0s - loss: 0.8467 - accuracy: 0.6545 - 143ms/epoch - 1ms/step
Loss: 0.8466845154762268, Accuracy: 0.6544826030731201
