In [61]:
import numpy as np 
import pandas as pd 
import tensorflow as tf

from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [62]:
# Load our dataset
dataset = pd.read_csv('../data/heart_failure/heart.csv')
dataset.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [63]:
# Separate features from targets
features = dataset.drop('HeartDisease',axis=1)
targets = dataset['HeartDisease']

# Encode our targets using one-hot encoding
targets_onehot = pd.get_dummies(targets)
targets_onehot.head()

Unnamed: 0,0,1
0,True,False
1,False,True
2,True,False
3,False,True
4,True,False


In [64]:
# Check distribution of target values
targets.value_counts()

HeartDisease
1    508
0    410
Name: count, dtype: int64

In [65]:
# One-hot encode categorical features
features_onehot = pd.get_dummies(features)
features_onehot.shape

(918, 20)

In [66]:
# Convert to float (probably not needed, but neural networks use floats 
# internally, so this lets us specify the type explicitly)
features_onehot = features_onehot.astype('float32')
targets_onehot = targets_onehot.astype('float32')
features_onehot.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Age                918 non-null    float32
 1   RestingBP          918 non-null    float32
 2   Cholesterol        918 non-null    float32
 3   FastingBS          918 non-null    float32
 4   MaxHR              918 non-null    float32
 5   Oldpeak            918 non-null    float32
 6   Sex_F              918 non-null    float32
 7   Sex_M              918 non-null    float32
 8   ChestPainType_ASY  918 non-null    float32
 9   ChestPainType_ATA  918 non-null    float32
 10  ChestPainType_NAP  918 non-null    float32
 11  ChestPainType_TA   918 non-null    float32
 12  RestingECG_LVH     918 non-null    float32
 13  RestingECG_Normal  918 non-null    float32
 14  RestingECG_ST      918 non-null    float32
 15  ExerciseAngina_N   918 non-null    float32
 16  ExerciseAngina_Y   918 non

In [67]:
# Split our data into 90/10
train_features, test_features, train_targets, test_targets = train_test_split(features_onehot, targets_onehot, test_size=0.1, stratify=targets)

In [68]:

# Convert the dataframes into tensors
train_features_tensor = tf.convert_to_tensor(train_features)
train_targets_tensor = tf.convert_to_tensor(train_targets)
test_features_tensor = tf.convert_to_tensor(test_features)
test_targets_tensor = tf.convert_to_tensor(test_targets)
test_targets_tensor.shape

TensorShape([92, 2])

In [69]:

# Create our neural network. 
model = Sequential([Dense(20,activation='sigmoid'), Dense(2,activation="sigmoid")])
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [70]:
model.fit(train_features_tensor, train_targets_tensor, epochs=20, batch_size=2)

Epoch 1/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 681us/step - accuracy: 0.6129 - loss: 0.7126
Epoch 2/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 696us/step - accuracy: 0.7401 - loss: 0.5891
Epoch 3/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 680us/step - accuracy: 0.7416 - loss: 0.5752
Epoch 4/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 693us/step - accuracy: 0.7534 - loss: 0.5555
Epoch 5/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 696us/step - accuracy: 0.7503 - loss: 0.5544
Epoch 6/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 685us/step - accuracy: 0.7745 - loss: 0.5331
Epoch 7/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 744us/step - accuracy: 0.7581 - loss: 0.5197
Epoch 8/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 681us/step - accuracy: 0.7375 - loss: 0.5531
Epoch 9/20
[1m413/413[

<keras.src.callbacks.history.History at 0x76a9f8243c10>

In [71]:
model.evaluate(test_features, test_targets)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.8395 - loss: 0.3766


[0.3732165992259979, 0.8586956262588501]

In [72]:
model_simple = Sequential([Dense(10,activation='sigmoid'), Dense(2,activation="sigmoid")])
model_simple.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model_simple.fit(train_features, train_targets, epochs=20, batch_size=2)

Epoch 1/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 728us/step - accuracy: 0.4196 - loss: 0.7326
Epoch 2/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 744us/step - accuracy: 0.5244 - loss: 0.6678
Epoch 3/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 772us/step - accuracy: 0.5948 - loss: 0.6176
Epoch 4/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 764us/step - accuracy: 0.7007 - loss: 0.6143
Epoch 5/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 771us/step - accuracy: 0.7270 - loss: 0.5952
Epoch 6/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 756us/step - accuracy: 0.7577 - loss: 0.5581
Epoch 7/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 706us/step - accuracy: 0.7696 - loss: 0.5495
Epoch 8/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 709us/step - accuracy: 0.7881 - loss: 0.5173
Epoch 9/20
[1m413/413[

<keras.src.callbacks.history.History at 0x76a9e2b11290>

In [73]:
model_simple.evaluate(test_features, test_targets)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.9003 - loss: 0.3335


[0.3292577862739563, 0.9021739363670349]

In [74]:
# Load and Pre-process the dataset. This code has been copied from the workshop on Regression for Life expectancy

# Load the dataset
dataset2 = pd.read_csv('../data/life_expectancy/Life Expectancy Data.csv')
dataset2.head()

# Drop all rows where we are missing the life expectancy. Had to put the key in a list 
# for this to work in this notebook!?
dataset2 = dataset2.dropna(subset=['Life expectancy '])

# Fill the remaining missing values with the means for each column
float_cols_with_nas = ['Alcohol','Hepatitis B',' BMI ','Polio','Total expenditure','Diphtheria ','GDP','Population',' thinness  1-19 years',' thinness 5-9 years','Income composition of resources','Schooling']
dataset2[float_cols_with_nas] = dataset2[float_cols_with_nas].fillna(dataset2[float_cols_with_nas].mean())

# Convert the categorical columns to appropriate types
dataset2[['Country', 'Status']] = dataset2[['Country', 'Status']].astype('category')

# Encode the categorical columns using one-hot encoding
dataset2 = pd.get_dummies(dataset2)

# Separate into features/targets
targets2 = dataset2['Life expectancy ']
features2 = dataset2.drop('Life expectancy ', axis=1)
features2 = features2.astype('float32')
targets2 = targets2.astype('float32')

# Split the data into 75/25 (as specified in the assignment)
train_targets2, test_targets2, train_features2, test_features2 = train_test_split(targets2,features2)
train_features2_tensor = tf.convert_to_tensor(train_features2)
train_targets2_tensor = tf.convert_to_tensor(train_targets2)
test_features2_tensor = tf.convert_to_tensor(test_features2)
test_targets2_tensor = tf.convert_to_tensor(test_targets2)

features2.head()

Unnamed: 0,Year,Adult Mortality,infant deaths,Alcohol,percentage expenditure,Hepatitis B,Measles,BMI,under-five deaths,Polio,...,Country_Uruguay,Country_Uzbekistan,Country_Vanuatu,Country_Venezuela (Bolivarian Republic of),Country_Viet Nam,Country_Yemen,Country_Zambia,Country_Zimbabwe,Status_Developed,Status_Developing
0,2015.0,263.0,62.0,0.01,71.279625,65.0,1154.0,19.1,83.0,6.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,2014.0,271.0,64.0,0.01,73.523582,62.0,492.0,18.6,86.0,58.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,2013.0,268.0,66.0,0.01,73.219246,64.0,430.0,18.1,89.0,62.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2012.0,272.0,69.0,0.01,78.184212,67.0,2787.0,17.6,93.0,67.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,2011.0,275.0,71.0,0.01,7.097109,68.0,3013.0,17.200001,97.0,68.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [75]:
model2 = Sequential([Dense(units=204,activation='sigmoid'), Dense(units=1,activation="linear")])
model2.compile(optimizer='adam',loss='mean_squared_error',metrics=['mean_absolute_error'])
model2.fit(train_features2_tensor,train_targets2_tensor, epochs=20, batch_size=1)

Epoch 1/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 715us/step - loss: 1797.9438 - mean_absolute_error: 35.5513
Epoch 2/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 709us/step - loss: 91.4659 - mean_absolute_error: 7.7787
Epoch 3/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 671us/step - loss: 83.8769 - mean_absolute_error: 7.3848
Epoch 4/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 689us/step - loss: 89.4604 - mean_absolute_error: 7.7553
Epoch 5/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 688us/step - loss: 89.3733 - mean_absolute_error: 7.6869
Epoch 6/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 682us/step - loss: 88.1279 - mean_absolute_error: 7.6417
Epoch 7/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 675us/step - loss: 87.0590 - mean_absolute_error: 7.6340
Epoch 8/20
[1m2196/2196[0m [32m━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x76a9e2ad8ed0>

In [76]:
model2.evaluate(test_features2_tensor, test_targets2_tensor)
# model2(test_features2[:20].to_numpy())

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 88.1515 - mean_absolute_error: 7.5955


[88.42637634277344, 7.621264457702637]

In [77]:
from tensorflow.keras.layers import Normalization

normalization = Normalization(axis=None)
normalization.adapt(train_features2_tensor)
model3 = Sequential([normalization,Dense(units=40,activation='sigmoid'),Dense(units=1)])
model3.compile(optimizer='sgd',loss='mean_squared_error',metrics=['mean_absolute_error'])
model3.fit(train_features2_tensor, train_targets2_tensor, epochs=20, batch_size=24)


Epoch 1/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 558.4174 - mean_absolute_error: 15.3980
Epoch 2/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 678us/step - loss: 93.0874 - mean_absolute_error: 7.9675
Epoch 3/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 691us/step - loss: 92.6362 - mean_absolute_error: 7.8693
Epoch 4/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 812us/step - loss: 96.4228 - mean_absolute_error: 8.0994
Epoch 5/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 93.8776 - mean_absolute_error: 7.9618
Epoch 6/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 993us/step - loss: 89.9602 - mean_absolute_error: 7.7088
Epoch 7/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 810us/step - loss: 93.8531 - mean_absolute_error: 7.9535
Epoch 8/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 825

<keras.src.callbacks.history.History at 0x76a9fa5becd0>

In [78]:
model3.evaluate(test_features2_tensor, test_targets2_tensor)
model3(test_features2_tensor[:40])

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 91.5619 - mean_absolute_error: 7.5068


<tf.Tensor: shape=(40, 1), dtype=float32, numpy=
array([[70.29426 ],
       [71.194374],
       [71.05975 ],
       [69.78806 ],
       [71.15424 ],
       [71.16211 ],
       [71.18084 ],
       [71.004814],
       [69.76901 ],
       [71.15608 ],
       [70.63288 ],
       [71.18276 ],
       [70.96547 ],
       [71.15128 ],
       [70.5615  ],
       [71.154236],
       [70.7789  ],
       [71.15424 ],
       [71.15423 ],
       [71.01997 ],
       [70.791435],
       [70.18766 ],
       [71.154854],
       [71.15424 ],
       [69.86107 ],
       [71.15424 ],
       [71.009796],
       [69.69433 ],
       [71.19465 ],
       [71.19465 ],
       [71.04142 ],
       [71.15263 ],
       [71.15281 ],
       [69.966644],
       [69.82681 ],
       [70.394936],
       [71.15505 ],
       [69.72758 ],
       [69.70759 ],
       [70.26176 ]], dtype=float32)>

In [79]:
model4 = Sequential()
model4.add(normalization)
model4.add(Dense(40, activation='relu'))
model4.add(Dense(40, activation='relu'))
model4.add(Dense(40, activation='relu'))
model4.add(Dense(40, activation='relu'))
model4.add(Dense(40, activation='relu'))
model4.add(Dense(1, activation='linear'))
model4.compile('adam',loss='mean_squared_error',metrics=['mean_absolute_error'])

model4.fit(train_features2,train_targets2, epochs=40, batch_size=20)

Epoch 1/40
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 4482.3403 - mean_absolute_error: 65.0692
Epoch 2/40
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 921us/step - loss: 1353.0659 - mean_absolute_error: 27.9955
Epoch 3/40
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 879us/step - loss: 202.3459 - mean_absolute_error: 9.1328
Epoch 4/40
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 872us/step - loss: 116.6754 - mean_absolute_error: 8.1043
Epoch 5/40
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 918us/step - loss: 100.3284 - mean_absolute_error: 7.9260
Epoch 6/40
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 896us/step - loss: 90.6736 - mean_absolute_error: 7.7475
Epoch 7/40
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 904us/step - loss: 92.3341 - mean_absolute_error: 7.8446
Epoch 8/40
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0

<keras.src.callbacks.history.History at 0x76a9fabdb2d0>

In [80]:
import keras
inputs = keras.Input(shape=(204,))
print(inputs.shape)
norm_inputs = normalization(inputs)
x = Dense(40, activation='relu')(norm_inputs)
x = Dense(40, activation='relu')(x)
x = Dense(40, activation='relu')(x) + Dense(40,activation='relu')(norm_inputs)
x = Dense(1, activation='linear')(x)
model5 = keras.Model(inputs=inputs, outputs=x)
print(model5.summary())
model5.compile('adam',loss='mean_squared_error',metrics=['mean_absolute_error'])

model5.fit(train_features2,train_targets2, epochs=20, batch_size=20)


(None, 204)


None
Epoch 1/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 4612.6694 - mean_absolute_error: 66.2944 
Epoch 2/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 932us/step - loss: 2467.4966 - mean_absolute_error: 44.4693
Epoch 3/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 854us/step - loss: 1210.0739 - mean_absolute_error: 27.6107
Epoch 4/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 833us/step - loss: 273.5031 - mean_absolute_error: 10.6942
Epoch 5/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 844us/step - loss: 103.4895 - mean_absolute_error: 8.1505
Epoch 6/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 813us/step - loss: 92.0931 - mean_absolute_error: 7.8510
Epoch 7/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 816us/step - loss: 91.1116 - mean_absolute_error: 7.8078
Epoch 8/20
[1m110/110[0m [32m━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x76a9fbdb2a90>

In [81]:
from numba import cuda
device = cuda.get_current_device()
device.reset()