In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# Load the BMI dataset
data = pd.read_csv("data.csv")
data.head()

Unnamed: 0.1,Unnamed: 0,bmi,gender,is_training,name
0,0,34.207396,Male,1,img_0.bmp
1,1,26.45372,Male,1,img_1.bmp
2,2,34.967561,Female,1,img_2.bmp
3,3,22.044766,Female,1,img_3.bmp
4,4,37.758789,Female,1,img_4.bmp


In [3]:
print(len(data))

4206


In [4]:
print(len(data[data['is_training']==1]))
print(len(data[data['is_training']==0]))

3368
838


In [5]:
data.info()
image_paths='C:/Users/kisho/Desktop/UChicago Academics/Images'

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4206 entries, 0 to 4205
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Unnamed: 0   4206 non-null   int64  
 1   bmi          4206 non-null   float64
 2   gender       4206 non-null   object 
 3   is_training  4206 non-null   int64  
 4   name         4206 non-null   object 
dtypes: float64(1), int64(2), object(2)
memory usage: 164.4+ KB


In [6]:
data.value_counts()

Unnamed: 0  bmi        gender  is_training  name        
0           34.207396  Male    1            img_0.bmp       1
2793        28.662354  Female  1            img_2793.bmp    1
2795        26.289704  Female  1            img_2795.bmp    1
2796        33.792661  Male    1            img_2796.bmp    1
2797        28.160551  Male    1            img_2797.bmp    1
                                                           ..
1407        46.511695  Female  1            img_1407.bmp    1
1408        26.622856  Male    1            img_1408.bmp    1
1409        24.900200  Male    1            img_1409.bmp    1
1410        50.029844  Male    1            img_1410.bmp    1
4205        34.618844  Male    0            img_4205.bmp    1
Length: 4206, dtype: int64

In [7]:
# Preprocess the image paths and labels
image_paths = data["name"].values

valid_image_paths = []
import os
missing_images=0
missing_list = []
for image_path in image_paths:
    if os.path.isfile(image_path):
        valid_image_paths.append(image_path)
    else:
        missing_images+=1
        missing_list.append(image_path)
        #print(f"Image file not found: {image_path}")
        
print(missing_images)
#print(missing_list)
#image_paths = valid_image_paths
#labels = data["bmi"].values

244


In [8]:
data_filtered = data[~data['name'].isin(missing_list)]

In [9]:
data = data_filtered
len(data)

3962

In [10]:
print(len(data[data['is_training']==1]))
print(len(data[data['is_training']==0]))

3210
752


In [11]:
train_data = data[data["is_training"] == 1]
test_data = data[data["is_training"] == 0]

In [12]:
train_paths = train_data["name"].tolist()
train_labels = train_data["bmi"].tolist()

test_paths = test_data["name"].tolist()
test_labels = test_data["bmi"].tolist()

#from sklearn.model_selection import train_test_split

#train_paths, val_paths, train_labels, val_labels = train_test_split(
#    train_paths, train_labels, test_size=0.2, random_state=42
#)

In [13]:
print(len(train_labels))
print(len(val_labels))

3210


NameError: name 'val_labels' is not defined

#### Adding Custom Layers: The code adds custom layers on top of the VGG16 model. It adds a global average pooling layer to reduce the spatial dimensions, followed by a fully connected (dense) layer with ReLU activation function, and an output layer with a single neuron for BMI prediction.

In [None]:
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)

# Add a fully connected layer with 64 neurons
x = Dense(64, activation="relu")(x)

# Add an output layer with 1 neuron for BMI prediction
predictions = Dense(1)(x)

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the pre-trained layers
for layer in base_model.layers:
    layer.trainable = False

In [None]:
%%time 
model.compile(optimizer="adam", loss="mean_squared_error")

# Define the image data generator
datagen = image.ImageDataGenerator(rescale=1.0 / 255.0)

# Create training and validation data generators
train_generator = datagen.flow_from_dataframe(
    pd.DataFrame({"path": train_paths, "BMI": train_labels}),
    x_col="path",
    y_col="BMI",
    target_size=(224, 224),
    batch_size=32,
    class_mode="raw",
)
val_generator = datagen.flow_from_dataframe(
    pd.DataFrame({"path": val_paths, "BMI": val_labels}),
    x_col="path",
    y_col="BMI",
    target_size=(224, 224),
    batch_size=32,
    class_mode="raw",
)

# Train the model
history = model.fit(train_generator, validation_data=val_generator, epochs=10)

In [None]:
%%time

import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score

# Calculate predictions for the validation set
val_predictions = model.predict(val_generator)
val_predictions = val_predictions.flatten()

# Create a dataframe for actual and predicted BMI values
val_results = pd.DataFrame({'Actual BMI': val_labels, 'Predicted BMI': val_predictions})

# Add the image file names column
val_results['Image File'] = val_paths

# Define the BMI categories
bmi_categories = ["Underweight", "Normal Weight", "Overweight"]

# Convert actual BMI values to BMI categories
val_results['Actual BMI Category'] = pd.cut(val_results['Actual BMI'], bins=[0, 18.5, 25, np.inf], labels=bmi_categories)

# Convert predicted BMI values to BMI categories
val_results['Predicted BMI Category'] = pd.cut(val_results['Predicted BMI'], bins=[0, 18.5, 25, np.inf], labels=bmi_categories)

# Calculate accuracy
val_accuracy = accuracy_score(val_results['Actual BMI Category'], val_results['Predicted BMI Category'])

print("Validation Accuracy:", val_accuracy)
print(val_results)

In [None]:
val_results.head()

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Calculate predictions for the validation set
val_predictions = model.predict(val_generator)
val_predictions = val_predictions.flatten()

# Calculate RMSE
val_rmse = np.sqrt(mean_squared_error(val_labels, val_predictions))

# Calculate MAE
val_mae = mean_absolute_error(val_labels, val_predictions)

print("Validation RMSE:", val_rmse)
print("Validation MAE:", val_mae)

In [None]:
from sklearn.metrics import roc_auc_score

# Convert actual BMI values to binary labels (0 or 1)
actual_labels = pd.cut(val_results['Actual BMI'], bins=[0, 24.9, np.inf], labels=[0, 1])

# Calculate AUC
auc = roc_auc_score(actual_labels, val_results['Predicted BMI'])

# Print AUC
print("AUC:", auc)

In [None]:
from sklearn.metrics import accuracy_score, r2_score

# Calculate R2 score
val_r2 = r2_score(val_results['Actual BMI'], val_results['Predicted BMI'])
print("R2 Score:",val_r2)