In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [3]:
# Load our dataset
dataset = pd.read_csv('../data/heart_failure/heart.csv')
dataset.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [4]:
# Separate features from targets
features = dataset.drop('HeartDisease',axis=1)
targets = dataset['HeartDisease']

# Encode our targets using one-hot encoding
targets_onehot = pd.get_dummies(targets)
targets_onehot.head()

Unnamed: 0,0,1
0,True,False
1,False,True
2,True,False
3,False,True
4,True,False


In [5]:
# Check distribution of target values
targets.value_counts()

HeartDisease
1    508
0    410
Name: count, dtype: int64

In [6]:
# One-hot encode categorical features
features_onehot = pd.get_dummies(features)
features_onehot.shape

(918, 20)

In [7]:
# Convert to float (probably not needed, but neural networks use floats 
# internally, so this lets us specify the type explicitly)
features_onehot = features_onehot.astype('float32')
targets_onehot = targets_onehot.astype('float32')
features_onehot.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Age                918 non-null    float32
 1   RestingBP          918 non-null    float32
 2   Cholesterol        918 non-null    float32
 3   FastingBS          918 non-null    float32
 4   MaxHR              918 non-null    float32
 5   Oldpeak            918 non-null    float32
 6   Sex_F              918 non-null    float32
 7   Sex_M              918 non-null    float32
 8   ChestPainType_ASY  918 non-null    float32
 9   ChestPainType_ATA  918 non-null    float32
 10  ChestPainType_NAP  918 non-null    float32
 11  ChestPainType_TA   918 non-null    float32
 12  RestingECG_LVH     918 non-null    float32
 13  RestingECG_Normal  918 non-null    float32
 14  RestingECG_ST      918 non-null    float32
 15  ExerciseAngina_N   918 non-null    float32
 16  ExerciseAngina_Y   918 non

In [8]:
# Split our data into 90/10
from sklearn.model_selection import train_test_split
train_features, test_features, train_targets, test_targets = train_test_split(features_onehot, targets_onehot, test_size=0.1, stratify=targets)

In [9]:
import tensorflow as tf

# Convert the dataframes into tensors
train_features_tensor = tf.convert_to_tensor(train_features)
train_targets_tensor = tf.convert_to_tensor(train_targets)
test_features_tensor = tf.convert_to_tensor(test_features)
test_targets_tensor = tf.convert_to_tensor(test_targets)

test_targets.shape

2024-10-30 11:28:39.162332: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-30 11:28:39.164821: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-30 11:28:39.172711: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1730284119.186590   54528 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1730284119.190569   54528 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-30 11:28:39.205255: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

(92, 2)

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Create our neural network. 
model = Sequential([Dense(20,activation='sigmoid'), Dense(2,activation="sigmoid")])
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [11]:
model.fit(train_features, train_targets, epochs=20, batch_size=2)

Epoch 1/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 558us/step - accuracy: 0.5667 - loss: 0.6735
Epoch 2/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 578us/step - accuracy: 0.7330 - loss: 0.5589
Epoch 3/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 529us/step - accuracy: 0.7368 - loss: 0.5469
Epoch 4/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 669us/step - accuracy: 0.7990 - loss: 0.5057
Epoch 5/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 549us/step - accuracy: 0.8036 - loss: 0.4640
Epoch 6/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 553us/step - accuracy: 0.8270 - loss: 0.4410
Epoch 7/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 548us/step - accuracy: 0.8492 - loss: 0.3980
Epoch 8/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 545us/step - accuracy: 0.8367 - loss: 0.4083
Epoch 9/20
[1m413/413[

<keras.src.callbacks.history.History at 0x7dc76611bf50>

In [12]:
model.evaluate(test_features, test_targets)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8825 - loss: 0.3568  


[0.40352463722229004, 0.8586956262588501]

In [13]:
model_simple = Sequential([Dense(10,activation='sigmoid'), Dense(2,activation="sigmoid")])
model_simple.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model_simple.fit(train_features, train_targets, epochs=20, batch_size=2)

Epoch 1/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 557us/step - accuracy: 0.4632 - loss: 0.7226
Epoch 2/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 556us/step - accuracy: 0.6863 - loss: 0.6245
Epoch 3/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 568us/step - accuracy: 0.7229 - loss: 0.5891
Epoch 4/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 543us/step - accuracy: 0.7344 - loss: 0.5617
Epoch 5/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 577us/step - accuracy: 0.7183 - loss: 0.5659
Epoch 6/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 590us/step - accuracy: 0.7510 - loss: 0.5479
Epoch 7/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 565us/step - accuracy: 0.7036 - loss: 0.5414
Epoch 8/20
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 567us/step - accuracy: 0.7290 - loss: 0.5308
Epoch 9/20
[1m413/413[

<keras.src.callbacks.history.History at 0x7dc75c5fd9d0>

In [14]:
model_simple.evaluate(test_features, test_targets)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7485 - loss: 0.5232


[0.554521918296814, 0.739130437374115]

In [17]:
# Load and Pre-process the dataset. This code has been copied from the workshop on Regression for Life expectancy

# Load the dataset
dataset2 = pd.read_csv('../data/life_expectancy/Life Expectancy Data.csv')
dataset2.head()

# Drop all rows where we are missing the life expectancy. Had to put the key in a list 
# for this to work in this notebook!?
dataset2 = dataset2.dropna(subset=['Life expectancy '])

# Fill the remaining missing values with the means for each column
float_cols_with_nas = ['Alcohol','Hepatitis B',' BMI ','Polio','Total expenditure','Diphtheria ','GDP','Population',' thinness  1-19 years',' thinness 5-9 years','Income composition of resources','Schooling']
dataset2[float_cols_with_nas] = dataset2[float_cols_with_nas].fillna(dataset2[float_cols_with_nas].mean())

# Convert the categorical columns to appropriate types
dataset2[['Country', 'Status']] = dataset2[['Country', 'Status']].astype('category')

# Encode the categorical columns using one-hot encoding
dataset2 = pd.get_dummies(dataset2)

# Separate into features/targets
targets2 = dataset2['Life expectancy ']
features2 = dataset2.drop('Life expectancy ', axis=1)

from sklearn.model_selection import train_test_split
# Split the data into 75/25 (as specified in the assignment)
train_targets2, test_targets2, train_features2, test_features2 = train_test_split(targets2,features2)

features2.head()

Unnamed: 0,Year,Adult Mortality,infant deaths,Alcohol,percentage expenditure,Hepatitis B,Measles,BMI,under-five deaths,Polio,...,Country_Uruguay,Country_Uzbekistan,Country_Vanuatu,Country_Venezuela (Bolivarian Republic of),Country_Viet Nam,Country_Yemen,Country_Zambia,Country_Zimbabwe,Status_Developed,Status_Developing
0,2015,263.0,62,0.01,71.279624,65.0,1154,19.1,83,6.0,...,False,False,False,False,False,False,False,False,False,True
1,2014,271.0,64,0.01,73.523582,62.0,492,18.6,86,58.0,...,False,False,False,False,False,False,False,False,False,True
2,2013,268.0,66,0.01,73.219243,64.0,430,18.1,89,62.0,...,False,False,False,False,False,False,False,False,False,True
3,2012,272.0,69,0.01,78.184215,67.0,2787,17.6,93,67.0,...,False,False,False,False,False,False,False,False,False,True
4,2011,275.0,71,0.01,7.097109,68.0,3013,17.2,97,68.0,...,False,False,False,False,False,False,False,False,False,True


In [18]:
model2 = Sequential([Dense(units=204,activation='sigmoid'), Dense(units=1,activation="linear")])
model2.compile(optimizer='adam',loss='mean_squared_error',metrics=['mean_absolute_error'])
model2.fit(train_features2, train_targets2, epochs=20, batch_size=1)

Epoch 1/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 734us/step - loss: 1758.6888 - mean_absolute_error: 34.7015
Epoch 2/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 684us/step - loss: 91.6458 - mean_absolute_error: 7.7981
Epoch 3/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 722us/step - loss: 89.3239 - mean_absolute_error: 7.7755
Epoch 4/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 844us/step - loss: 86.5736 - mean_absolute_error: 7.5719
Epoch 5/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 916us/step - loss: 87.7805 - mean_absolute_error: 7.7112
Epoch 6/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 88.3293 - mean_absolute_error: 7.7153
Epoch 7/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 929us/step - loss: 87.1603 - mean_absolute_error: 7.6399
Epoch 8/20
[1m2196/2196[0m [32m━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7dc737b71790>

In [20]:
model2.evaluate(test_features2, test_targets2)
# model2(test_features2[:20].to_numpy())

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 864us/step - loss: 89.1320 - mean_absolute_error: 7.6359


[86.3584213256836, 7.547824382781982]

In [22]:
from tensorflow.keras.layers import Normalization

normalization = Normalization(axis=None)
normalization.adapt(train_features2)
model3 = Sequential([normalization,Dense(units=40,activation='sigmoid'),Dense(units=1)])
model3.compile(optimizer='sgd',loss='mean_squared_error',metrics=['mean_absolute_error'])
model3.fit(train_features2, train_targets2, epochs=20, batch_size=24)


UnboundLocalError: cannot access local variable 'input_shape' where it is not associated with a value

In [None]:
model3.evaluate(test_features2, test_targets2)
model3(test_features2[:40])

In [None]:
model4 = Sequential()
model4.add(normalization)
model4.add(Dense(40, activation='relu'))
model4.add(Dense(40, activation='relu'))
model4.add(Dense(40, activation='relu'))
model4.add(Dense(40, activation='relu'))
model4.add(Dense(40, activation='relu'))
model4.add(Dense(1, activation='linear'))
print(model4.summary())
model4.compile('adam',loss='mean_squared_error',metrics=['mean_absolute_error'])

model4.fit(train_features2,train_targets2, epochs=40, batch_size=20)

In [None]:
import keras
inputs = keras.Input(shape=(204,))
print(inputs.shape)
norm_inputs = normalization(inputs)
x = Dense(40, activation='relu')(norm_inputs)
x = Dense(40, activation='relu')(x)
x = Dense(40, activation='relu')(x)
x = Dense(40, activation='relu')(x)
x = Dense(40, activation='relu')(x) + Dense(40,activation='relu')(norm_inputs)
x = Dense(1, activation='linear')(x)
model5 = keras.Model(inputs=inputs, outputs=x)
print(model5.summary())
model5.compile('adam',loss='mean_squared_error',metrics=['mean_absolute_error'])

model5.fit(train_features2,train_targets2, epochs=20, batch_size=20)
