# PART 1: DEPENDENCIES AND LOADING DATA

In [31]:
# Import our dependencies
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf

In [6]:
# Reading CSV file from the Resources folder into a pandas DataFrame

body_performance_df = pd.read_csv('/content/bodyPerformance.csv')
body_performance_df.head()

Unnamed: 0,age,gender,height_cm,weight_kg,body fat_%,diastolic,systolic,gripForce,sit and bend forward_cm,sit-ups counts,broad jump_cm,class
0,27.0,M,172.3,75.24,21.3,80.0,130.0,54.9,18.4,60.0,217.0,C
1,25.0,M,165.0,55.8,15.7,77.0,126.0,36.4,16.3,53.0,229.0,A
2,31.0,M,179.6,78.0,20.1,92.0,152.0,44.8,12.0,49.0,181.0,C
3,32.0,M,174.5,71.1,18.4,76.0,147.0,41.4,15.2,53.0,219.0,B
4,28.0,M,173.8,67.7,17.1,70.0,127.0,43.5,27.1,45.0,217.0,B


# PART 2: CLEANING DATA

In [8]:
# Datatypes
body_performance_df.dtypes

Unnamed: 0,0
age,float64
gender,object
height_cm,float64
weight_kg,float64
body fat_%,float64
diastolic,float64
systolic,float64
gripForce,float64
sit and bend forward_cm,float64
sit-ups counts,float64


In [9]:
# Unique values
body_performance_df.nunique()

Unnamed: 0,0
age,44
gender,2
height_cm,467
weight_kg,1398
body fat_%,527
diastolic,89
systolic,102
gripForce,550
sit and bend forward_cm,528
sit-ups counts,81


In [10]:
# Renaming columns
body_performance_df.rename(columns={
    'age': 'Age',
    'gender': 'Gender',
    'height_cm': 'Height (cm)',
    'weight_kg': 'Weight (Kg)',
    'body fat_%': 'Body Fat %',
    'diastolic': 'Diastolic BP',
    'systolic': 'Systolic BP',
    'gripFroce': 'Grip Froce',
    'sit and bend forward_cm': 'Sit and Bend Froward (cm)',
    'sit-ups counts': 'Sit-Ups Counts',
    'broad jump_cm': 'Broad Jump (cm)',
    'class': 'Classification'
}, inplace=True)

body_performance_df.head()

Unnamed: 0,Age,Gender,Height (cm),Weight (Kg),Body Fat %,Diastolic BP,Systolic BP,gripForce,Sit and Bend Froward (cm),Sit-Ups Counts,Broad Jump (cm),Classification
0,27.0,M,172.3,75.24,21.3,80.0,130.0,54.9,18.4,60.0,217.0,C
1,25.0,M,165.0,55.8,15.7,77.0,126.0,36.4,16.3,53.0,229.0,A
2,31.0,M,179.6,78.0,20.1,92.0,152.0,44.8,12.0,49.0,181.0,C
3,32.0,M,174.5,71.1,18.4,76.0,147.0,41.4,15.2,53.0,219.0,B
4,28.0,M,173.8,67.7,17.1,70.0,127.0,43.5,27.1,45.0,217.0,B


In [11]:
#
body_performance_df['Classification'] = body_performance_df['Classification'].replace({'B':'A', 'C':'D'})
body_performance_df.head()

Unnamed: 0,Age,Gender,Height (cm),Weight (Kg),Body Fat %,Diastolic BP,Systolic BP,gripForce,Sit and Bend Froward (cm),Sit-Ups Counts,Broad Jump (cm),Classification
0,27.0,M,172.3,75.24,21.3,80.0,130.0,54.9,18.4,60.0,217.0,D
1,25.0,M,165.0,55.8,15.7,77.0,126.0,36.4,16.3,53.0,229.0,A
2,31.0,M,179.6,78.0,20.1,92.0,152.0,44.8,12.0,49.0,181.0,D
3,32.0,M,174.5,71.1,18.4,76.0,147.0,41.4,15.2,53.0,219.0,A
4,28.0,M,173.8,67.7,17.1,70.0,127.0,43.5,27.1,45.0,217.0,A


# PART 3" CREATE LABELS SET (y) AND FEATURES (X)

In [19]:
# Creating new DataFrame for transformed non-numeric data to numeric, Boolean data.

body_performance_numeric_df = pd.get_dummies(body_performance_df, columns=['Gender'], dtype=int)

# Displaying the first few rows
body_performance_numeric_df.head()

Unnamed: 0,Age,Height (cm),Weight (Kg),Body Fat %,Diastolic BP,Systolic BP,gripForce,Sit and Bend Froward (cm),Sit-Ups Counts,Broad Jump (cm),Classification,Gender_F,Gender_M
0,27.0,172.3,75.24,21.3,80.0,130.0,54.9,18.4,60.0,217.0,D,0,1
1,25.0,165.0,55.8,15.7,77.0,126.0,36.4,16.3,53.0,229.0,A,0,1
2,31.0,179.6,78.0,20.1,92.0,152.0,44.8,12.0,49.0,181.0,D,0,1
3,32.0,174.5,71.1,18.4,76.0,147.0,41.4,15.2,53.0,219.0,A,0,1
4,28.0,173.8,67.7,17.1,70.0,127.0,43.5,27.1,45.0,217.0,A,0,1


In [26]:
# Initiating the LabelEncoder

le = LabelEncoder()

# Applying LabelEncoder to the classification column (the labels)

body_performance_numeric_df['Classification'] = le.fit_transform(body_performance_numeric_df['Classification'].values)

In [27]:
# Separating data into labels and features

y = body_performance_numeric_df['Classification']

X = body_performance_numeric_df.drop(columns=['Classification'])

In [22]:
# Classification column contains: A, B, C, D
# A=Great_Health B=Good_Health C=Average_Health D=Need_more_training
y.head()

Unnamed: 0,Classification
0,1
1,0
2,1
3,0
4,0


In [23]:
# value counts for label (y) data
y.value_counts()

Unnamed: 0_level_0,count
Classification,Unnamed: 1_level_1
1,6698
0,6695


In [24]:
X.head()

Unnamed: 0,Age,Height (cm),Weight (Kg),Body Fat %,Diastolic BP,Systolic BP,gripForce,Sit and Bend Froward (cm),Sit-Ups Counts,Broad Jump (cm),Gender_F,Gender_M
0,27.0,172.3,75.24,21.3,80.0,130.0,54.9,18.4,60.0,217.0,0,1
1,25.0,165.0,55.8,15.7,77.0,126.0,36.4,16.3,53.0,229.0,0,1
2,31.0,179.6,78.0,20.1,92.0,152.0,44.8,12.0,49.0,181.0,0,1
3,32.0,174.5,71.1,18.4,76.0,147.0,41.4,15.2,53.0,219.0,0,1
4,28.0,173.8,67.7,17.1,70.0,127.0,43.5,27.1,45.0,217.0,0,1


In [25]:
# value counts for features (X) data
X.value_counts()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,count
Age,Height (cm),Weight (Kg),Body Fat %,Diastolic BP,Systolic BP,gripForce,Sit and Bend Froward (cm),Sit-Ups Counts,Broad Jump (cm),Gender_F,Gender_M,Unnamed: 12_level_1
27.0,157.0,49.10,30.7,70.0,86.0,27.7,19.7,51.0,167.0,1,0,2
21.0,148.0,42.00,22.8,62.0,104.0,22.4,23.8,26.0,155.0,1,0,1
42.0,165.1,54.40,14.5,82.0,146.0,36.5,16.2,47.0,211.0,0,1,1
42.0,162.8,60.10,27.4,82.0,129.0,28.2,19.5,38.0,156.0,1,0,1
42.0,163.0,57.60,24.1,98.0,148.0,30.6,16.6,33.0,148.0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
27.0,166.4,67.70,22.2,70.0,134.0,38.2,16.6,50.0,211.0,0,1,1
27.0,166.5,58.50,19.7,79.0,125.0,31.4,22.0,32.0,150.0,1,0,1
27.0,166.5,71.92,37.7,78.0,133.0,28.1,26.6,40.0,161.0,1,0,1
27.0,166.6,66.40,37.8,85.0,122.0,18.7,18.5,15.0,132.0,1,0,1


# PART 4: SPLIT DATA INTO TRAINING AND TESTING SETS

In [28]:
# Splitting data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

X_train.shape

(10044, 12)

# PART 5: FIT AND TRANSFORM DATA

In [29]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# PART 6: KERAS SEQUENTIAL MODEL CREATION

In [39]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
#  YOUR CODE GOES HERE

neural_network_model = tf.keras.models.Sequential()

# First hidden layer
#  YOUR CODE GOES HERE

neural_network_model.add(tf.keras.layers.Dense(units=80, activation="relu", input_dim=12))

# Second hidden layer
#  YOUR CODE GOES HERE

neural_network_model.add(tf.keras.layers.Dense(units=30, activation="sigmoid"))

# Output layer
#  YOUR CODE GOES HERE

neural_network_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
neural_network_model.summary()

# PART 7: COMPILE KERAS SEQUENTIAL MODEL AND TRAIN DATASET

In [40]:
# Compile the model
#  YOUR CODE GOES HERE

neural_network_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [41]:
# Train the model
#  YOUR CODE GOES HERE

fit_model = neural_network_model.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7404 - loss: 0.5363
Epoch 2/100
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8312 - loss: 0.3738
Epoch 3/100
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8456 - loss: 0.3517
Epoch 4/100
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8507 - loss: 0.3365
Epoch 5/100
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8644 - loss: 0.3177
Epoch 6/100
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8635 - loss: 0.3142
Epoch 7/100
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8742 - loss: 0.3030
Epoch 8/100
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8768 - loss: 0.2937
Epoch 9/100
[1m314/314[0m [32

In [42]:
# Evaluate the model using the test data
model_loss, model_accuracy = neural_network_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

105/105 - 0s - 2ms/step - accuracy: 0.8794 - loss: 0.2742
Loss: 0.2742401957511902, Accuracy: 0.8793669939041138
