In [47]:
import numpy as np 
import pandas as pd 
import tensorflow as tf

from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Input

In [16]:
# Load our dataset
dataset = pd.read_csv('../data/heart_failure/heart.csv')
dataset.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [17]:
# Separate features from targets
features = dataset.drop('HeartDisease',axis=1)
targets = dataset['HeartDisease']

# Encode our targets using one-hot encoding
targets_onehot = pd.get_dummies(targets)
targets_onehot.head()

Unnamed: 0,0,1
0,True,False
1,False,True
2,True,False
3,False,True
4,True,False


In [18]:
# Check distribution of target values
targets.value_counts()

HeartDisease
1    508
0    410
Name: count, dtype: int64

In [19]:
features.shape

(918, 11)

In [20]:
# One-hot encode categorical features
features_onehot = pd.get_dummies(features)
features_onehot.shape

(918, 20)

In [21]:
# Convert to float (probably not needed, but neural networks use floats 
# internally, so this lets us specify the type explicitly)
features_onehot = features_onehot.astype('float32')
targets_onehot = targets_onehot.astype('float32')
features_onehot.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Age                918 non-null    float32
 1   RestingBP          918 non-null    float32
 2   Cholesterol        918 non-null    float32
 3   FastingBS          918 non-null    float32
 4   MaxHR              918 non-null    float32
 5   Oldpeak            918 non-null    float32
 6   Sex_F              918 non-null    float32
 7   Sex_M              918 non-null    float32
 8   ChestPainType_ASY  918 non-null    float32
 9   ChestPainType_ATA  918 non-null    float32
 10  ChestPainType_NAP  918 non-null    float32
 11  ChestPainType_TA   918 non-null    float32
 12  RestingECG_LVH     918 non-null    float32
 13  RestingECG_Normal  918 non-null    float32
 14  RestingECG_ST      918 non-null    float32
 15  ExerciseAngina_N   918 non-null    float32
 16  ExerciseAngina_Y   918 non

In [22]:
# Split our data into 90/10
train_features, test_features, train_targets, test_targets = train_test_split(features_onehot, targets_onehot, test_size=0.1, stratify=targets)

In [23]:
train_features.shape

(826, 20)

In [24]:

# Convert the dataframes into tensors
train_features_tensor = tf.convert_to_tensor(train_features)
train_targets_tensor = tf.convert_to_tensor(train_targets)
test_features_tensor = tf.convert_to_tensor(test_features)
test_targets_tensor = tf.convert_to_tensor(test_targets)
test_targets_tensor.shape

TensorShape([92, 2])

In [25]:

# Create our neural network. 
model = Sequential([
    Input(shape=(20,)),
    Dense(20,activation='sigmoid'), 
    Dense(2,activation="sigmoid")])
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [26]:
model.fit(train_features_tensor, train_targets_tensor, epochs=20, batch_size=2)

Epoch 1/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5043 - loss: 0.6984
Epoch 2/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.6831 - loss: 0.6032
Epoch 3/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6875 - loss: 0.5873
Epoch 4/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7030 - loss: 0.5664
Epoch 5/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7497 - loss: 0.5633
Epoch 6/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7319 - loss: 0.5468
Epoch 7/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 986us/step - accuracy: 0.7705 - loss: 0.5151
Epoch 8/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 784us/step - accuracy: 0.7749 - loss: 0.5051
Epoch 9/20
[1m413/413[0m [32m━━━━

<keras.src.callbacks.history.History at 0x717c4f8c8310>

In [27]:
model.evaluate(test_features, test_targets)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.8178 - loss: 0.4404


[0.44781485199928284, 0.8152173757553101]

In [28]:
model_simple = Sequential([Input(shape=(20,)), Dense(10,activation='sigmoid'), Dense(2,activation="sigmoid")])
model_simple.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model_simple.fit(train_features, train_targets, epochs=20, batch_size=2)

Epoch 1/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5492 - loss: 0.7202
Epoch 2/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5533 - loss: 0.6633
Epoch 3/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5311 - loss: 0.6643
Epoch 4/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5478 - loss: 0.6458
Epoch 5/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5346 - loss: 0.6270
Epoch 6/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5989 - loss: 0.6332
Epoch 7/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6080 - loss: 0.6263
Epoch 8/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5493 - loss: 0.6330
Epoch 9/20
[1m413/413[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x717c119ec490>

In [29]:
model_simple.evaluate(test_features, test_targets)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 114ms/step - accuracy: 0.7181 - loss: 0.5959


[0.5832069516181946, 0.717391312122345]

In [30]:
# Load and Pre-process the dataset.
# Load the dataset
dataset2 = pd.read_csv('../data/life_expectancy/Life Expectancy Data.csv')
dataset2.head()

# Drop all rows where we are missing the life expectancy. Had to put the key in a list 
# for this to work in this notebook!?
dataset2 = dataset2.dropna(subset=['Life expectancy '])

# Fill the remaining missing values with the means for each column
float_cols_with_nas = ['Alcohol','Hepatitis B',' BMI ','Polio','Total expenditure','Diphtheria ','GDP','Population',' thinness  1-19 years',' thinness 5-9 years','Income composition of resources','Schooling']
dataset2[float_cols_with_nas] = dataset2[float_cols_with_nas].fillna(dataset2[float_cols_with_nas].mean())

# Convert the categorical columns to appropriate types
dataset2[['Country', 'Status']] = dataset2[['Country', 'Status']].astype('category')

# Encode the categorical columns using one-hot encoding
dataset2 = pd.get_dummies(dataset2)

# Separate into features/targets
targets2 = dataset2['Life expectancy ']
features2 = dataset2.drop('Life expectancy ', axis=1)
features2 = features2.astype('float32')
targets2 = targets2.astype('float32')

# Split the data into 75/25 (as specified in the assignment)
train_targets2, test_targets2, train_features2, test_features2 = train_test_split(targets2,features2)
train_features2_tensor = tf.convert_to_tensor(train_features2)
train_targets2_tensor = tf.convert_to_tensor(train_targets2)
test_features2_tensor = tf.convert_to_tensor(test_features2)
test_targets2_tensor = tf.convert_to_tensor(test_targets2)

features2.head()

Unnamed: 0,Year,Adult Mortality,infant deaths,Alcohol,percentage expenditure,Hepatitis B,Measles,BMI,under-five deaths,Polio,...,Country_Uruguay,Country_Uzbekistan,Country_Vanuatu,Country_Venezuela (Bolivarian Republic of),Country_Viet Nam,Country_Yemen,Country_Zambia,Country_Zimbabwe,Status_Developed,Status_Developing
0,2015.0,263.0,62.0,0.01,71.279625,65.0,1154.0,19.1,83.0,6.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,2014.0,271.0,64.0,0.01,73.523582,62.0,492.0,18.6,86.0,58.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,2013.0,268.0,66.0,0.01,73.219246,64.0,430.0,18.1,89.0,62.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2012.0,272.0,69.0,0.01,78.184212,67.0,2787.0,17.6,93.0,67.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,2011.0,275.0,71.0,0.01,7.097109,68.0,3013.0,17.200001,97.0,68.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [32]:
model2 = Sequential([Input(shape=(204,)),Dense(units=204,activation='sigmoid'), Dense(units=1,activation="linear")])
model2.compile(optimizer='adam',loss='mean_squared_error',metrics=['mean_absolute_error'])
model2.fit(train_features2_tensor,train_targets2_tensor, epochs=20, batch_size=1)

Epoch 1/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 852us/step - loss: 1667.1926 - mean_absolute_error: 33.5780
Epoch 2/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 81.6285 - mean_absolute_error: 7.3045
Epoch 3/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 993us/step - loss: 85.8083 - mean_absolute_error: 7.6011
Epoch 4/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 84.5426 - mean_absolute_error: 7.3424
Epoch 5/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 85.1614 - mean_absolute_error: 7.4716
Epoch 6/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 978us/step - loss: 88.5175 - mean_absolute_error: 7.6259
Epoch 7/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 86.9504 - mean_absolute_error: 7.5816
Epoch 8/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x717bf8bba110>

In [34]:
model2.evaluate(test_features2_tensor, test_targets2_tensor)
# model2(test_features2[:20].to_numpy())

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 93.8465 - mean_absolute_error: 7.8993


[91.4810791015625, 7.782674312591553]

In [35]:
train_features2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2196 entries, 1512 to 2670
Columns: 204 entries, Year to Status_Developing
dtypes: float32(204)
memory usage: 1.7 MB


In [36]:
from tensorflow.keras.layers import Normalization

normalization = Normalization(axis=None)
normalization.adapt(train_features2_tensor)
model3 = Sequential([Input(shape=(204,)),normalization,Dense(units=40,activation='sigmoid'),Dense(units=1)])
model3.compile(optimizer='sgd',loss='mean_squared_error',metrics=['mean_absolute_error'])
model3.fit(train_features2_tensor, train_targets2_tensor, epochs=20, batch_size=24)


Epoch 1/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 580.6116 - mean_absolute_error: 15.4985
Epoch 2/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 878us/step - loss: 88.9308 - mean_absolute_error: 7.6655
Epoch 3/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 936us/step - loss: 89.6362 - mean_absolute_error: 7.6542
Epoch 4/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 779us/step - loss: 94.6690 - mean_absolute_error: 7.9964
Epoch 5/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 90.5715 - mean_absolute_error: 7.7694
Epoch 6/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 741us/step - loss: 92.9052 - mean_absolute_error: 7.8099
Epoch 7/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 91.9384 - mean_absolute_error: 7.8497
Epoch 8/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/s

<keras.src.callbacks.history.History at 0x717bb81663d0>

In [40]:
test_features2_tensor.shape

TensorShape([732, 204])

In [37]:
model3.evaluate(test_features2_tensor, test_targets2_tensor)
model3(test_features2_tensor[:40])

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 115.0385 - mean_absolute_error: 9.2799


<tf.Tensor: shape=(40, 1), dtype=float32, numpy=
array([[64.52629 ],
       [65.06633 ],
       [64.20512 ],
       [64.88066 ],
       [64.52354 ],
       [64.51358 ],
       [64.44554 ],
       [63.742573],
       [64.524155],
       [64.524506],
       [64.532745],
       [64.51591 ],
       [63.890507],
       [64.549065],
       [64.523766],
       [64.52504 ],
       [64.5249  ],
       [64.49045 ],
       [64.3634  ],
       [63.74258 ],
       [63.74258 ],
       [63.818043],
       [64.483   ],
       [64.46745 ],
       [64.52631 ],
       [64.488525],
       [63.742573],
       [63.7541  ],
       [63.76709 ],
       [63.840534],
       [63.74258 ],
       [64.52373 ],
       [64.52522 ],
       [64.51171 ],
       [64.193726],
       [63.742565],
       [63.741955],
       [64.501854],
       [64.52291 ],
       [63.95091 ]], dtype=float32)>

In [44]:
model4 = Sequential()
model4.add(Input(shape=(204,)))
model4.add(normalization)
model4.add(Dense(40, activation='relu'))
model4.add(Dense(40, activation='relu'))
model4.add(Dense(40, activation='relu'))
model4.add(Dense(40, activation='relu'))
model4.add(Dense(40, activation='relu'))
model4.add(Dense(1, activation='linear'))
model4.compile('adam',loss='mean_squared_error',metrics=['mean_absolute_error'])

model4.fit(train_features2,train_targets2, epochs=40, batch_size=20)

Epoch 1/40
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 4622.8516 - mean_absolute_error: 65.1608
Epoch 2/40
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2330.0024 - mean_absolute_error: 34.5717
Epoch 3/40
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 299.2180 - mean_absolute_error: 10.3747
Epoch 4/40
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 145.9911 - mean_absolute_error: 8.2925
Epoch 5/40
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 101.2559 - mean_absolute_error: 8.1002
Epoch 6/40
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 92.5777 - mean_absolute_error: 7.9006
Epoch 7/40
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 95.7826 - mean_absolute_error: 7.9348
Epoch 8/40
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

<keras.src.callbacks.history.History at 0x717b80395850>

In [48]:
inputs = Input(shape=(204,))
print(inputs.shape)
norm_inputs = normalization(inputs)
x = Dense(40, activation='relu')(norm_inputs)
x = Dense(40, activation='relu')(x)
x = Dense(40, activation='relu')(x) + Dense(40,activation='relu')(norm_inputs)
x = Dense(1, activation='linear')(x)
model5 = Model(inputs=inputs, outputs=x)
print(model5.summary())
model5.compile('adam',loss='mean_squared_error',metrics=['mean_absolute_error'])

model5.fit(train_features2,train_targets2, epochs=20, batch_size=20)


(None, 204)


None
Epoch 1/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 4625.2197 - mean_absolute_error: 65.2643
Epoch 2/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2351.8645 - mean_absolute_error: 42.3592
Epoch 3/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 863.6107 - mean_absolute_error: 19.5478
Epoch 4/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 895us/step - loss: 228.6996 - mean_absolute_error: 9.4675
Epoch 5/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 996us/step - loss: 96.9129 - mean_absolute_error: 7.7987
Epoch 6/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 88.5248 - mean_absolute_error: 7.6682
Epoch 7/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 93.3031 - mean_absolute_error: 7.8794
Epoch 8/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x717b581bdfd0>

In [None]:
from numba import cuda
device = cuda.get_current_device()
device.reset()