In [55]:
import numpy as np 
import pandas as pd 
import tensorflow as tf

from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input

In [37]:
# Load our dataset
dataset = pd.read_csv('../data/heart_failure/heart.csv')
dataset.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [38]:
# Separate features from targets
features = dataset.drop('HeartDisease',axis=1)
targets = dataset['HeartDisease']

# Encode our targets using one-hot encoding
targets_onehot = pd.get_dummies(targets)
targets_onehot.head()

Unnamed: 0,0,1
0,True,False
1,False,True
2,True,False
3,False,True
4,True,False


In [39]:
# Check distribution of target values
targets.value_counts()

HeartDisease
1    508
0    410
Name: count, dtype: int64

In [59]:
features.shape

(918, 11)

In [40]:
# One-hot encode categorical features
features_onehot = pd.get_dummies(features)
features_onehot.shape

(918, 20)

In [41]:
# Convert to float (probably not needed, but neural networks use floats 
# internally, so this lets us specify the type explicitly)
features_onehot = features_onehot.astype('float32')
targets_onehot = targets_onehot.astype('float32')
features_onehot.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Age                918 non-null    float32
 1   RestingBP          918 non-null    float32
 2   Cholesterol        918 non-null    float32
 3   FastingBS          918 non-null    float32
 4   MaxHR              918 non-null    float32
 5   Oldpeak            918 non-null    float32
 6   Sex_F              918 non-null    float32
 7   Sex_M              918 non-null    float32
 8   ChestPainType_ASY  918 non-null    float32
 9   ChestPainType_ATA  918 non-null    float32
 10  ChestPainType_NAP  918 non-null    float32
 11  ChestPainType_TA   918 non-null    float32
 12  RestingECG_LVH     918 non-null    float32
 13  RestingECG_Normal  918 non-null    float32
 14  RestingECG_ST      918 non-null    float32
 15  ExerciseAngina_N   918 non-null    float32
 16  ExerciseAngina_Y   918 non

In [42]:
# Split our data into 90/10
train_features, test_features, train_targets, test_targets = train_test_split(features_onehot, targets_onehot, test_size=0.1, stratify=targets)

In [65]:
train_features.shape

(826, 20)

In [43]:

# Convert the dataframes into tensors
train_features_tensor = tf.convert_to_tensor(train_features)
train_targets_tensor = tf.convert_to_tensor(train_targets)
test_features_tensor = tf.convert_to_tensor(test_features)
test_targets_tensor = tf.convert_to_tensor(test_targets)
test_targets_tensor.shape

TensorShape([92, 2])

In [63]:

# Create our neural network. 
model = Sequential([
    Input(shape=(20,)),
    Dense(20,activation='sigmoid'), 
    Dense(2,activation="sigmoid")])
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [64]:
model.fit(train_features_tensor, train_targets_tensor, epochs=20, batch_size=2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7b6d4dae9390>

In [46]:
model.evaluate(test_features, test_targets)



[0.42792776226997375, 0.8152173757553101]

In [47]:
model_simple = Sequential([Dense(10,activation='sigmoid'), Dense(2,activation="sigmoid")])
model_simple.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model_simple.fit(train_features, train_targets, epochs=20, batch_size=2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7b6d4d4d12d0>

In [48]:
model_simple.evaluate(test_features, test_targets)



[0.427807092666626, 0.8260869383811951]

In [49]:
# Load and Pre-process the dataset. This code has been copied from the workshop on Regression for Life expectancy

# Load the dataset
dataset2 = pd.read_csv('../data/life_expectancy/Life Expectancy Data.csv')
dataset2.head()

# Drop all rows where we are missing the life expectancy. Had to put the key in a list 
# for this to work in this notebook!?
dataset2 = dataset2.dropna(subset=['Life expectancy '])

# Fill the remaining missing values with the means for each column
float_cols_with_nas = ['Alcohol','Hepatitis B',' BMI ','Polio','Total expenditure','Diphtheria ','GDP','Population',' thinness  1-19 years',' thinness 5-9 years','Income composition of resources','Schooling']
dataset2[float_cols_with_nas] = dataset2[float_cols_with_nas].fillna(dataset2[float_cols_with_nas].mean())

# Convert the categorical columns to appropriate types
dataset2[['Country', 'Status']] = dataset2[['Country', 'Status']].astype('category')

# Encode the categorical columns using one-hot encoding
dataset2 = pd.get_dummies(dataset2)

# Separate into features/targets
targets2 = dataset2['Life expectancy ']
features2 = dataset2.drop('Life expectancy ', axis=1)
features2 = features2.astype('float32')
targets2 = targets2.astype('float32')

# Split the data into 75/25 (as specified in the assignment)
train_targets2, test_targets2, train_features2, test_features2 = train_test_split(targets2,features2)
train_features2_tensor = tf.convert_to_tensor(train_features2)
train_targets2_tensor = tf.convert_to_tensor(train_targets2)
test_features2_tensor = tf.convert_to_tensor(test_features2)
test_targets2_tensor = tf.convert_to_tensor(test_targets2)

features2.head()

Unnamed: 0,Year,Adult Mortality,infant deaths,Alcohol,percentage expenditure,Hepatitis B,Measles,BMI,under-five deaths,Polio,...,Country_Uruguay,Country_Uzbekistan,Country_Vanuatu,Country_Venezuela (Bolivarian Republic of),Country_Viet Nam,Country_Yemen,Country_Zambia,Country_Zimbabwe,Status_Developed,Status_Developing
0,2015.0,263.0,62.0,0.01,71.279625,65.0,1154.0,19.1,83.0,6.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,2014.0,271.0,64.0,0.01,73.523582,62.0,492.0,18.6,86.0,58.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,2013.0,268.0,66.0,0.01,73.219246,64.0,430.0,18.1,89.0,62.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2012.0,272.0,69.0,0.01,78.184212,67.0,2787.0,17.6,93.0,67.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,2011.0,275.0,71.0,0.01,7.097109,68.0,3013.0,17.200001,97.0,68.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [50]:
model2 = Sequential([Dense(units=204,activation='sigmoid'), Dense(units=1,activation="linear")])
model2.compile(optimizer='adam',loss='mean_squared_error',metrics=['mean_absolute_error'])
model2.fit(train_features2_tensor,train_targets2_tensor, epochs=20, batch_size=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7b6d4d39b210>

In [51]:
model2.evaluate(test_features2_tensor, test_targets2_tensor)
# model2(test_features2[:20].to_numpy())



[83.83763122558594, 7.527059555053711]

In [53]:
train_features2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2196 entries, 2579 to 18
Columns: 204 entries, Year to Status_Developing
dtypes: float32(204)
memory usage: 1.7 MB


In [56]:
from tensorflow.keras.layers import Normalization

normalization = Normalization(axis=None)
normalization.adapt(train_features2_tensor)
model3 = Sequential([Input(shape=(204,)),normalization,Dense(units=40,activation='sigmoid'),Dense(units=1)])
model3.compile(optimizer='sgd',loss='mean_squared_error',metrics=['mean_absolute_error'])
model3.fit(train_features2_tensor, train_targets2_tensor, epochs=20, batch_size=24)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7b6d4e534b50>

In [57]:
model3.evaluate(test_features2_tensor, test_targets2_tensor)
model3(test_features2_tensor[:40])



<tf.Tensor: shape=(40, 1), dtype=float32, numpy=
array([[70.81227 ],
       [71.51927 ],
       [72.02298 ],
       [70.76141 ],
       [70.68551 ],
       [72.611084],
       [71.51704 ],
       [70.696846],
       [72.700584],
       [70.81761 ],
       [71.523254],
       [71.324646],
       [70.6902  ],
       [71.51704 ],
       [70.6819  ],
       [70.69294 ],
       [71.71704 ],
       [72.58748 ],
       [70.73986 ],
       [70.68664 ],
       [72.69853 ],
       [72.69855 ],
       [70.68418 ],
       [70.743996],
       [72.68628 ],
       [70.693375],
       [72.69841 ],
       [72.698555],
       [71.836815],
       [72.69855 ],
       [72.38141 ],
       [70.68715 ],
       [72.70244 ],
       [72.70639 ],
       [70.730804],
       [70.738525],
       [70.69736 ],
       [70.68322 ],
       [71.18584 ],
       [72.14155 ]], dtype=float32)>

In [58]:
model4 = Sequential()
model4.add(normalization)
model4.add(Dense(40, activation='relu'))
model4.add(Dense(40, activation='relu'))
model4.add(Dense(40, activation='relu'))
model4.add(Dense(40, activation='relu'))
model4.add(Dense(40, activation='relu'))
model4.add(Dense(1, activation='linear'))
model4.compile('adam',loss='mean_squared_error',metrics=['mean_absolute_error'])

model4.fit(train_features2,train_targets2, epochs=40, batch_size=20)

ValueError: The last dimension of the inputs to a Dense layer should be defined. Found None. Full input shape received: (None, None)

In [None]:
import keras
inputs = keras.Input(shape=(204,))
print(inputs.shape)
norm_inputs = normalization(inputs)
x = Dense(40, activation='relu')(norm_inputs)
x = Dense(40, activation='relu')(x)
x = Dense(40, activation='relu')(x) + Dense(40,activation='relu')(norm_inputs)
x = Dense(1, activation='linear')(x)
model5 = keras.Model(inputs=inputs, outputs=x)
print(model5.summary())
model5.compile('adam',loss='mean_squared_error',metrics=['mean_absolute_error'])

model5.fit(train_features2,train_targets2, epochs=20, batch_size=20)


(None, 204)


None
Epoch 1/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 4612.6694 - mean_absolute_error: 66.2944 
Epoch 2/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 932us/step - loss: 2467.4966 - mean_absolute_error: 44.4693
Epoch 3/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 854us/step - loss: 1210.0739 - mean_absolute_error: 27.6107
Epoch 4/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 833us/step - loss: 273.5031 - mean_absolute_error: 10.6942
Epoch 5/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 844us/step - loss: 103.4895 - mean_absolute_error: 8.1505
Epoch 6/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 813us/step - loss: 92.0931 - mean_absolute_error: 7.8510
Epoch 7/20
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 816us/step - loss: 91.1116 - mean_absolute_error: 7.8078
Epoch 8/20
[1m110/110[0m [32m━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x76a9fbdb2a90>

In [None]:
from numba import cuda
device = cuda.get_current_device()
device.reset()