# Life Expectancy Analysis Using TensorFlow

In [66]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import Normalizer, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense
from tensorflow.keras.optimizers import Adam

In [None]:
dataset = pd.read_csv("life_expectancy.csv")
dataset.shape
dataset.describe()

In [None]:
dataset.columns

In [None]:
df = dataset.drop(["Country"], axis=1) 
#dropping country column, axis=1 is for columns and axis=row is for rows

In [None]:
df.columns

In [None]:
#splitting data into labels and features
labels = df.iloc[:, -1] #life expectancy column is the label
features = df.iloc[:, 0:20] #rest of the columns are features of the data
features

##### Data Preprocessing

In [None]:
#one-hot encoding of categorial features 
#development status is converted either to 1 or 0
features = pd.get_dummies(features)
features

In [11]:
#splitting the data into train and test set
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.2, random_state=0)

In [13]:
# features_train.shape
# features_test.shape

(588, 21)

In [38]:
#normalisation of the data because different features have different data ranges
#normalisation ensures that everything is between 0 and 1 for the numerical data values
numerical_features = features.select_dtypes(include=['float64', 'int64'])
numerical_columns = numerical_features.columns
# numerical_columns


In [39]:
ct = ColumnTransformer([('normalize', Normalizer(), numerical_columns)], remainder='passthrough')

In [52]:
#normalising teh training set
#returns np array
features_train_scaled = ct.fit_transform(features_train)
#changing to pd dataframe
# features_train_df = pd.DataFrame(features_train_scaled, columns=features_train.columns)
# features_train_df.head(5)

In [47]:
#normalising the test set
features_test_scaled = ct.fit(features_test)

#### Building The Model

In [54]:
#sequential model 
my_model = Sequential()

Metal device set to: Apple M1


2022-08-08 16:21:21.003846: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-08-08 16:21:21.006348: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [55]:
#creating an input layer using keras library
#shape of input layer is the same as the number of columns
input = InputLayer(input_shape=(features.shape[1],))
my_model.add(input)

In [60]:
#hidden layer with 64 neurons in it
my_model.add(Dense(64, activation="relu"))

In [61]:
#tensorflow has one oneput layer
#output layer
my_model.add(Dense(1))


In [63]:
my_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                1408      
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
 dense_2 (Dense)             (None, 64)                128       
                                                                 
 dense_3 (Dense)             (None, 1)                 65        
                                                                 
Total params: 1,666
Trainable params: 1,666
Non-trainable params: 0
_________________________________________________________________


#### Optimizer & Compile

In [67]:
opt = Adam(learning_rate=0.01)

In [76]:
#mse is mean square error and mea is mean error average
my_model.compile(loss="mse", metrics=["mae"], optimizer=opt)

#### Fit & Evaluate

In [77]:
#epochs is number of times the you go through over the dataset
#batch_size is the number of samples, for instance first 10 samples, 
#seen before updating parameters

my_model.fit(features_train, labels_train, epochs=40, batch_size=1, verbose=1)

Epoch 1/40


2022-08-08 16:53:56.358471: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x16c2e0970>

In [78]:
#testing to the test data set
final_mse, final_mae = my_model.evaluate(features_test, labels_test, verbose=0)

2022-08-08 17:01:15.657727: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [83]:
final_mse

91.38189697265625

In [84]:
final_mae

7.906745433807373