In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Label

In [None]:
label_train = pd.read_csv('label_train.csv')
label_train

In [None]:
label_train.isna().sum()

In [None]:
### Visualizing the data
y_train = label_train.iloc[:,2:].values
name = label_train.columns[2:].values

fig , ax = plt.subplots(
    nrows=2,
    ncols=4,
    figsize=(15, 10))

ax = ax.ravel()

for i in range(y_train.shape[1]):
    ax[i].hist(y_train[:,i],bins=20,color='b',edgecolor='k')
    ax[i].set_title(name[i])

plt.tight_layout()

In [None]:
def label_data_preprocessing(df):
    # List of columns to exclude from normalization
    exclude_columns = ['Subject ID', 'Day']
    columns_to_scale = [col for col in df.columns if col not in exclude_columns]
    
    df[columns_to_scale] = df[columns_to_scale].div(df[columns_to_scale].mean()) #normalize by mean
    return df
preprocessed_label = label_data_preprocessing(label_train)
preprocessed_label.head()

Lunch calories which we will select as our label later when we load our data into Dataloader are normalized by mean value.

## Viome Data

In [None]:
demo_viome_train = pd.read_csv('demo_viome_train.csv')
demo_viome_train.head()

In [None]:
demo_viome_train.isna().sum()

In [None]:
rows_with_empty_values = demo_viome_train[demo_viome_train['Viome'] == '[]']
print(rows_with_empty_values['Viome'])

No empty value in Viome data

In [None]:
### Extracting features as x_train (numpy array) and visualizing the data
x_train = demo_viome_train.iloc[:,2:-1].values
name = demo_viome_train.columns[2:-1].values

fig , ax = plt.subplots(
    nrows=5,
    ncols=4,
    figsize=(15, 10))

ax = ax.ravel()

for i in range(x_train.shape[1]):
    ax[i].hist(x_train[:,i],bins=20,color='b',edgecolor='k')
    ax[i].set_title(name[i])

fig.delaxes(ax[-3])
fig.delaxes(ax[-2])
fig.delaxes(ax[-1])
plt.tight_layout()

In [None]:
# Visualizing Viome Data 
x_train = demo_viome_train.iloc[:,-1].values
name = demo_viome_train.columns[-1]

# Splitting the strings and converting to floats
extracted_values = np.array([list(map(float, row.split(','))) for row in x_train])

fig , ax = plt.subplots(
    nrows=6,
    ncols=6,
    figsize=(15, 10))

ax = ax.ravel()

for i in range(len(extracted_values)):
    ax[i].hist(extracted_values[i],bins=20,color='b',edgecolor='k')

plt.tight_layout()

### Data preprocessing of demographic data and Viome

- For categoricala features, we applied one-hot encoding
- For numerical features, we applied Min-Max scaling

In [None]:
from sklearn.preprocessing import MinMaxScaler

def demo_viome_data_preprocessing(df):
    # List of columns to exclude from scaling
    exclude_columns = ['Subject ID', 'Gender', 'Race', 'Diabetes Status', 'Viome']
    columns_to_scale = [col for col in df.columns if col not in exclude_columns]
    # MinMaxScaler to columns_to_scale
    scaler = MinMaxScaler()
    df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])
    
    # One-Hot Encoding for 'Race' and 'Diabetes Status' columns
    df = pd.get_dummies(df, columns=['Race', 'Diabetes Status'], drop_first=True)

    df['Viome'] = df['Viome'].apply(lambda x: np.array([float(num) for num in x.split(',')]))

    # Stack vectors to create a 2D array for scaling
    viome_array = np.stack(df['Viome'].values)
    
    # Apply MinMaxScaler
    viome_scaled = scaler.fit_transform(viome_array)
    # Replace original column with the scaled values
    df['Viome'] = list(viome_scaled)
    
    return df
    
demo_viome_train_processed = demo_viome_data_preprocessing(demo_viome_train)

In [None]:
demo_viome_train_processed.head()

In [None]:
# Extract features and names
x_train = demo_viome_train_processed.drop(columns=['Viome']).values
x_train = x_train.astype(float)
name = demo_viome_train_processed.columns.difference(['Viome'], sort=False).tolist()

# Determine number of features
num_features = x_train.shape[1]
rows, cols = 5, 5

fig, ax = plt.subplots(nrows=rows, ncols=cols, figsize=(15, 10))
ax = ax.ravel()

# Loop through each feature and plot histogram
for i in range(num_features):
    ax[i].hist(x_train[:, i], bins=20, color='b', edgecolor='k')
    ax[i].set_title(name[i])

for i in range(num_features, len(ax)):
    fig.delaxes(ax[i])

plt.tight_layout()
plt.show()

In [None]:
## Normalized Viome data
x_train = demo_viome_train_processed['Viome'].values

fig , ax = plt.subplots(
    nrows=6,
    ncols=6,
    figsize=(15, 10))

ax = ax.ravel()

for i in range(len(x_train)):
    ax[i].hist(x_train[i],bins=20,color='b',edgecolor='k')

plt.tight_layout()

## CGM Data 

In [None]:
cgm_train = pd.read_csv('cgm_train.csv')
cgm_train

In [None]:
cgm_train.isnull().sum()

We will preprocess CGM data after merging all dataframes. 

## Image data

In [None]:
img_train = pd.read_csv('img_train.csv')
img_train

In [None]:
img_train.isna().sum()

In [None]:
### Visualizing the fiber data
x_train = img_train.iloc[:,2:4].values
name = img_train.columns[2:4].values

fig , ax = plt.subplots(
    nrows=1,
    ncols=2,
    figsize=(8, 4))

ax = ax.ravel()

for i in range(x_train.shape[1]):
    ax[i].hist(x_train[:,i],bins=20,color='b',edgecolor='k')
    ax[i].set_title(name[i])

plt.tight_layout()

In [None]:
# Impute missing values in 'Breakfast Fiber' with the median
img_train['Breakfast Fiber'] = img_train['Breakfast Fiber'].fillna(img_train['Breakfast Fiber'].median())

In [None]:
img_train.isna().sum()

In [None]:
import ast
rgb_image_str = img_train["Image Before Breakfast"][0]
rgb_image = ast.literal_eval(rgb_image_str)
rgb_array = np.array(rgb_image)
# Plotting the image
plt.imshow(rgb_array)
plt.axis('off')  # Turn off axis labels
plt.show()

In [None]:
rgb_image_str = img_train["Image Before Lunch"][0]
rgb_image = ast.literal_eval(rgb_image_str)
rgb_array = np.array(rgb_image)
# Plotting the image
plt.imshow(rgb_array)
plt.axis('off')  # Turn off axis labels
plt.show()

for i in range(len(img_train["Image Before Lunch"])):
    rgb_image_str = img_train["Image Before Lunch"][i]
#     print(np.shape(np.array(ast.literal_eval(rgb_image_str))))

## Merge the three dataframes and labels

In [None]:
# Merge the CGM_data and Image_data based on 'Subject ID' and 'Day' columns
df1 = cgm_train
df2 = img_train

merged_df_train = pd.merge(cgm_train, img_train, on=['Subject ID', 'Day'])

In [None]:
merged_df_train_wdemo_wviome = merged_df_train.merge(
    demo_viome_train_processed,
    on=['Subject ID'])

In [None]:
merged_df_wlabels = pd.merge(merged_df_train_wdemo_wviome, preprocessed_label, on=['Subject ID', 'Day'])
merged_df_wlabels

## Check for missing data and remove them

In [None]:
rows_with_empty_values = merged_df_wlabels[merged_df_wlabels['Image Before Lunch'] == '[]']
print(rows_with_empty_values['Image Before Lunch'])

In [None]:
merged_df_wlabels = merged_df_wlabels.drop(rows_with_empty_values.index)

In [None]:
rows_with_empty_values = merged_df_wlabels[merged_df_wlabels['Image Before Breakfast'] == '[]']
print(rows_with_empty_values['Image Before Breakfast'])

In [None]:
merged_df_wlabels = merged_df_wlabels.drop(rows_with_empty_values.index)


In [None]:
rows_with_empty_values = merged_df_wlabels[merged_df_wlabels['CGM Data'] == '[]']
print(rows_with_empty_values['CGM Data'])

In [None]:
merged_df_wlabels = merged_df_wlabels.drop(rows_with_empty_values.index)

In [None]:
merged_df_wlabels.head()

## Interpolate CGM data to per minute and Normalize by Baseline Fasting Glucose 

We interpolated CGm data to per-minute interval and normalized by baseline fasting glucose

In [None]:
from datetime import datetime
from scipy.interpolate import interp1d  # For interpolation


def parse_cgm_data(cgm_entry):
    if isinstance(cgm_entry, str):
        return ast.literal_eval(cgm_entry)  #convert CGM data into a Python literal
    return cgm_entry

# Function to interpolate CGM data to per-minute values
def interpolate_cgm_data(cgm_data):
    """
    Interpolates CGM data to per-minute glucose levels.

    Args:
        cgm_data (list): Original CGM data as a list of tuples (timestamp, glucose level).

    Returns:
        list: Interpolated and normalized CGM data as a list of tuples (timestamp as string, glucose level as float).
    """
    # Extract timestamps and glucose levels
    timestamps = [entry[0] if isinstance(entry[0], datetime) else datetime.strptime(entry[0], "%Y-%m-%d %H:%M:%S") for entry in cgm_data]
    glucose_levels = [entry[1] for entry in cgm_data]

    baseline_glucose = glucose_levels[0]

    # Convert timestamps to seconds since the start for interpolation
    seconds = [(t - timestamps[0]).total_seconds() for t in timestamps]

    # Interpolation function (linear)
    interp_func = interp1d(seconds, glucose_levels, kind="linear")

    # Generate per-minute timestamps
    min_timestamps = pd.date_range(timestamps[0], timestamps[-1], freq="min")
    min_seconds = [(t - timestamps[0]).total_seconds() for t in min_timestamps]

    # Interpolated glucose levels
    min_glucose_levels = interp_func(min_seconds)

    # Normalize glucose levels with the baseline
    normalized_glucose_levels = [g / baseline_glucose for g in min_glucose_levels]

    # Combine interpolated timestamps and glucose levels with timestamps as strings
    interpolated_data = [(t.strftime("%Y-%m-%d %H:%M:%S"), float(g)) for t, g in zip(min_timestamps, normalized_glucose_levels)]
    return interpolated_data

# Apply preprocessing and interpolation
merged_df_wlabels["CGM Data"] = merged_df_wlabels["CGM Data"].apply(parse_cgm_data)  # Parse CGM Data
merged_df_wlabels["CGM Data Per Minute"] = merged_df_wlabels["CGM Data"].apply(interpolate_cgm_data)

In [None]:
len(merged_df_wlabels["CGM Data"][2]), len(merged_df_wlabels["CGM Data Per Minute"][2])

In [None]:
merged_df_wlabels['A1C']

CGM data has variable-length thus in the next step we use padding to ensure all datapoints have length equal to max_lenght=750 using collate_fn function.

## custom Pytorch data loader

In [None]:
import torch
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence
import ast
import numpy as np


class MultiModalDataset(Dataset):
    def __init__(self, dataframe, label_columns, image_columns, cgm_columns, viome_columns, demo_columns, transform=None, target_transform=None):
        self.df = dataframe
        self.label_columns = label_columns
        self.image_columns = image_columns
        self.cgm_columns = cgm_columns
        self.viome_columns = viome_columns
        self.demo_columns = demo_columns
        self.transform = transform
        self.target_transform = target_transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        # Process image data
        image_data = self.df.iloc[idx][self.image_columns[0]]
        image = ast.literal_eval(image_data)
        image_array = np.array(image)
        image_array = image_array / 255 # Normalize the pixel values between 0 and 1
        image_tensor = torch.tensor(image_array, dtype=torch.float32).permute(2, 0, 1)  # Shape: (C, H, W)
        # Process CGM time-series data (ignore timestamps)
        cgm_data = self.df.iloc[idx][self.cgm_columns]
        #print(cgm_data)
        cgm_values = [entry[1] for entry in cgm_data]
        cgm_array = np.array(cgm_values).reshape(-1, 1)
        cgm_tensor = torch.tensor(cgm_values, dtype=torch.float32)  # Shape: (time_series_length,)
        # adding Viome data
        viome_data = self.df.iloc[idx][self.viome_columns[0]]
        #print(viome_data)
        viome_tensor = torch.tensor(viome_data, dtype=torch.float32)
        demo_data = self.df.iloc[idx][self.demo_columns]
        #print(f"Demo data at index {idx}: {demo_data}") 
        demo_tensor = torch.tensor(demo_data, dtype=torch.float32)
        # adding label
        label = self.df.iloc[idx][self.label_columns].values[0]
        #print(label)
        label_tensor = torch.tensor(label, dtype=torch.float32)
        if self.transform:
            image_tensor = self.transform(image_tensor)
        if self.target_transform:
            label_tensor = self.target_transform(label_tensor)
        return image_tensor, cgm_tensor, viome_tensor, demo_tensor, label_tensor

- Lunch calories are selected as our label.
- Image before lunch, Viome, CGM data per minute and anthropometric data are selected as features.
- For anthropometric data, we excluded some features which are highly correlated with ones in the current list to avoid multicollinearity.

In [None]:
label_columns = ['Lunch Calories']
image_columns = ['Image Before Lunch']
viome_columns = ['Viome']
demo_columns = ['A1C', 'Insulin', 'Triglycerides', 'Cholesterol','HDL', 'Non-HDL', 'VLDL', 'HOMA-IR', 'BMI']
cgm_columns = 'CGM Data Per Minute'

In [None]:
# Initialize MultiModalDataset
dataset = MultiModalDataset(
    dataframe=merged_df_wlabels,  
    label_columns=label_columns,
    image_columns=image_columns,
    cgm_columns=cgm_columns,  
    viome_columns=viome_columns,
    demo_columns = demo_columns
)

In [None]:
from torch.utils.data import DataLoader, random_split

# Custom collate function for handling variable-length sequences (since CGM data have variable length)
def collate_fn(batch):
    images, cgm_tensors, viomes, demos, labels = zip(*batch)

    # Stack image tensors and labels
    images = torch.stack(images)  # Shape: (batch_size, C, H, W)
    labels = torch.stack(labels)  # Shape: (batch_size,)
    viomes = torch.stack(viomes)
    demos = torch.stack(demos)

    # Pad CGM tensors to the max_length, # Shape: (batch_size, max_seq_len)
    max_length = 750
    cgm_tensors = pad_sequence([F.pad(tensor, (0, max_length - len(tensor)), "constant", 0)
                               for tensor in cgm_tensors], batch_first=True)

    # Reshape CGM tensor to add an 'input_size' dimension (1 for single feature per time-step)
    cgm_tensors = cgm_tensors.unsqueeze(-1)  # Shape: (batch_size, max_seq_len, 1)
    viomes = viomes.unsqueeze(-1)

    demos = demos.unsqueeze(-1)
    return images, cgm_tensors, viomes, demos, labels

# Split dataset into train and validation sets
train_size = int(0.8 * len(dataset))  # 80% for training
val_size = len(dataset) - train_size  # 20% for validation
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoaders for train and validation sets
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=10, shuffle=False, collate_fn=collate_fn)

In [None]:
import torch.nn.functional as F
for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(train_loader):
    print(f"Training Batch {batch_idx + 1}:")
    print(f"Image batch shape: {images.shape}")  # (batch_size, num_images, C, H, W)
    print(f"CGM batch shape: {cgm_tensors.shape}")  # (batch_size, max_seq_len)
    print(f"Viome batch shape: {viomes.shape}")  # (batch_size, max_seq_len)
    print(f"Demo batch shape: {demos.shape}")  # (batch_size, max_seq_len)
    print(f"Labels shape: {labels.shape}")  # (batch_size,)
    print(demos[0])

In [None]:
import torch.nn.functional as F

# Function to plot an image, CGM and Viome data 
def plot_image_with_label(image_tensor, label, cgm_tensor, viomes, batch_index):
    # Convert tensor from C x H x W to H x W x C for plotting
    image = image_tensor.permute(1, 2, 0).numpy()
    image = image * 255
    # Plot the image
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 3, 1)
    plt.imshow(image.astype(np.uint8))
    plt.title(f'Batch {batch_index + 1} - Label: {label.item()}')
    plt.axis('off')
    
    # Plot the CGM data
    cgm_values = cgm_tensor.numpy()
    valid_cgm_values = cgm_values

    plt.subplot(1, 3, 2)
    plt.plot(valid_cgm_values)
    plt.title('CGM Time-Series Data')
    plt.xlabel('Time Points')
    plt.ylabel('CGM Value')

    # Plot the viome data
    plt.subplot(1, 3, 3)
    plt.plot(viomes)
    plt.title('Viome Data')
    
    plt.tight_layout()
    plt.show()

# Iterate through the train DataLoader and plot one image, CGM, and viome data per batch
for batch_idx, (images, cgm_tensors, viomes,demos, labels) in enumerate(train_loader):
    print(f"Training Batch {batch_idx + 1}:")
    print(f"Image batch shape: {images.shape}")  # (batch_size, num_images, C, H, W)
    print(f"CGM batch shape: {cgm_tensors.shape}")  # (batch_size, max_seq_len)
    print(f"Viome batch shape: {viomes.shape}")  # (batch_size, max_seq_len)
    print(f"Labels shape: {labels.shape}")  # (batch_size,)

    # Plot the first image, CGM and viome data in the batch
    first_image = images[0] # First image of the first sample
    first_cgm = cgm_tensors[0]  # First CGM time-series data
    first_label = labels[0]
    first_viome = viomes[0] # First Viome data

    plot_image_with_label(first_image, first_label, first_cgm, first_viome, batch_idx)
    break  # Stop after first batch for visualization

# Iterate through the valid DataLoader to do the same
for batch_idx, (images, cgm_tensors, viomes, demos,labels) in enumerate(val_loader):
    print(f"Validation Batch {batch_idx + 1}:")
    print(f"Image batch shape: {images.shape}")  # (batch_size, num_images, C, H, W)
    print(f"CGM batch shape: {cgm_tensors.shape}")  # (batch_size, max_seq_len)
    print(f"Viome batch shape: {viomes.shape}")  # (batch_size, max_seq_len)
    print(f"Labels shape: {labels.shape}")  # (batch_size,)

    first_image = images[0]
    first_cgm = cgm_tensors[0]
    first_label = labels[0]
    first_viome = viomes[0]

    plot_image_with_label(first_image, first_label, first_cgm, first_viome, batch_idx)
    break  # Stop after first batch for visualization

## Modelling

- We initially started with relatively simpler model. Using combination of CNN and LSTM for image and CGM data respectively to extract embedding from them.
-  CNN: we apply two convolutional layers (with 32 and 64 filters of 3x3 size) with ReLU activation and 2x2 max pooling and final fully connected layer to get a set of 64 embeddings from the image.
- We use a two-layer LSTM with 64 units, which outputs 64 embeddings.

In [None]:
import torch
import torch.nn as nn

class LSTM(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=2, embedding_size=64):
        super(LSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, embedding_size)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        last_hidden_state = lstm_out[:, -1, :]
        embedding = self.fc(last_hidden_state)
        return embedding


class CNN(nn.Module):
    def __init__(self, input_channels=3, embedding_size=64):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)  # 2x2 max pooling
        self.fc = nn.Linear(64 * 16 * 16, embedding_size)  # After 2 pooling layers, output size = 64x64 -> 16x16

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten the output (batch_size, 64*16*16)
        embedding = self.fc(x)  # Final 64-dimensional embedding
        return embedding

- We use MultiModalModel function to concatenate embeddings into joint embeddings and then fed into Regression model that is final neural network that we use to get final prediction of lunch calorie.
- References for this part of the code: https://github.com/stmilab/joint_embedding_calorie_prediction/tree/main?tab=readme-ov-file
- Final network contains two fully connected layer with ReLU activation and dropout, followed by final FC layer that gives the output prediction

In [None]:
class MultiModalModel(nn.Module):
    def __init__(self, models, n_classes=1):
        super().__init__()
        self.models = nn.ModuleList(models)

    def forward(self, xs):
        zs = [model(x) for model, x in zip(self.models, xs)]
        return zs

class Regressor(nn.Module):
    def __init__(self, input_size=64, hidden=128, output_size=1, dropout=0.2):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, hidden),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(hidden, hidden),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(hidden, output_size)
        )

    def forward(self, x):
        return self.layers(x)

### Image (CNN) + CGM (LSTM)

In [None]:
cgm_model = LSTM(input_size=1, hidden_size=64, num_layers=2, embedding_size=64)
img_model = CNN(embedding_size=64)

# Check if CUDA (GPU support) is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Combining the multimodal model
multimodal_model = MultiModalModel([img_model, cgm_model,]).to(device)

#prediction model
expected_input_size = 64 + 64
nutrient_predictor = Regressor(expected_input_size, hidden=128, output_size=1, dropout=0.2).to(device)

import torch.optim as optim
optimizer = optim.Adam(
        list(nutrient_predictor.parameters())
        + list(img_model.parameters())
        + list(cgm_model.parameters()),
        lr=1e-4,
        weight_decay=0.2,
    )

# Initialize lists to store epoch losses
m1_train_losses = []
m1_val_losses = []

# Training Loop
min_loss = float("inf")
epochs = 30

for epoch in range(epochs):
    epoch_train_loss = []
    
    # Training phase
    multimodal_model.train()
    for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        images, cgm_tensors, demos, labels = images.to(device), cgm_tensors.to(device), demos.to(device), labels.to(device).unsqueeze(-1)
        
        # Forward pass
        img_embedding, cgm_embedding = multimodal_model([images, cgm_tensors])
        combined_embedding = torch.cat((img_embedding, cgm_embedding), dim=-1)
        predictions = nutrient_predictor(combined_embedding)
        
        # Calculate RMSRE Loss
        msre = torch.mean((predictions - labels) ** 2 / (labels ** 2))
        msre.backward()
        optimizer.step()
        
        epoch_train_loss.append(msre.item())

    avg_train_loss = np.sqrt(np.mean(epoch_train_loss))
    m1_train_losses.append(avg_train_loss)  # Store train loss
    
    # Validation phase
    epoch_val_loss = []
    multimodal_model.eval()
    with torch.no_grad():
        for images, cgm_tensors, viomes, demos, labels in val_loader:
            images, cgm_tensors, demos, labels = images.to(device), cgm_tensors.to(device), demos.to(device), labels.to(device).unsqueeze(-1)
            
            img_embedding, cgm_embedding = multimodal_model([images, cgm_tensors])
            combined_embedding = torch.cat((img_embedding, cgm_embedding), dim=-1)
            predictions = nutrient_predictor(combined_embedding)
            
            val_loss = torch.mean(((labels - predictions) / labels) ** 2)
            epoch_val_loss.append(val_loss.item())
        
        avg_val_rmsre = np.sqrt(np.mean(epoch_val_loss))
        m1_val_losses.append(avg_val_rmsre)  # Store validation loss
        
        # Track minimum loss
        if avg_val_rmsre < min_loss:
            min_loss = avg_val_rmsre

    print(f"Epoch {epoch}, Train Loss: {avg_train_loss:.3f}, Val RMSRE: {avg_val_rmsre:.3f}")

In [None]:
# Plot Train and Validation Loss
plt.figure(figsize=(5, 5))
plt.plot(range(epochs), m1_train_losses, label='Train', marker='o', color='b')
plt.plot(range(epochs), m1_val_losses, label='Validation', marker='s',color='r')
plt.xlabel('Epochs')
plt.ylabel('RMSRE')
plt.legend()
plt.show()

print(np.mean(m1_train_losses[14:]),np.std(m1_val_losses[14:]))
print(np.mean(m1_val_losses[14:]),np.std(m1_val_losses[14:]))

- Next, we turn to more complex models.
- We use vision transformers for Image data and transformer encoder for CGM data as in Zhang et al.

### Image + CGM (transformer)

In [None]:
## Transformer

class MultiheadAttention(nn.Module):
    def __init__(
        self,
        n_features,
        embed_dim,
        num_heads,
        num_classes,
        dropout=0,
        num_layers=6,
        use_pos_emb=False,
        activation=nn.GELU(),
    ):
        super().__init__()

        self.use_pos_emb = use_pos_emb

        self.conv = nn.Conv1d(n_features, embed_dim, 1, 1)
        self.attn = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                embed_dim,
                num_heads,
                batch_first=True,
                dropout=dropout,
                activation=activation,
            ),
            num_layers,
        )

        self.register_buffer(
            "position_vec",
            torch.tensor(
                [
                    math.pow(10000.0, 2.0 * (i // 2) / embed_dim)
                    for i in range(embed_dim)
                ],
            ),
        )

        self.linear = nn.Linear(embed_dim, num_classes)
        self.sig = nn.Sigmoid()

    def temporal_enc(self, time, non_pad_mask):
        """
        Input: batch*seq_len.
        Output: batch*seq_len*d_model.
        """

        result = time[:, None] / self.position_vec[None, :, None]
        result[:, :, 0::2] = torch.sin(result[:, :, 0::2])
        result[:, :, 1::2] = torch.cos(result[:, :, 1::2])
        return result * non_pad_mask[:, None]

    def forward(self, x, lens=0, t=0):
        mask = (x == float("inf"))[:, :, 0]
        x[mask] = 0

        z = self.conv(x.permute(0, 2, 1))

        if self.use_pos_emb:
            tem_enc = self.temporal_enc(t, mask)
            z = z + tem_enc

        z = z.permute(0, 2, 1).float()

        z = self.attn(z, src_key_padding_mask=mask)

        return self.linear(z.mean(1))

In [None]:
import torch.nn.functional as F
import pdb
import math


def img_to_patch(x, patch_size, flatten_channels=True):
    """
    Inputs:
        x - torch.Tensor representing the image of shape [B, C, H, W]
        patch_size - Number of pixels per dimension of the patches (integer)
        flatten_channels - If True, the patches will be returned in a flattened format
                           as a feature vector instead of a image grid.
    """
    B, C, H, W = x.shape
    x = x.reshape(B, C, H // patch_size, patch_size, W // patch_size, patch_size)
    x = x.permute(0, 2, 4, 1, 3, 5)  # [B, H', W`, C, p_H, p_W]
    x = x.flatten(1, 2)  # [B, H`*W', C, p_H, p_W]
    if flatten_channels:
        x = x.flatten(2, 4)  # [B, H'*W', C*p_H*p_W]
    return x


class AttentionBlock(nn.Module):
    def __init__(self, embed_dim, hidden_dim, num_heads, dropout=0.0):
        """
        Inputs:
            embed_dim - Dimensionality of input and attention feature vectors
            hidden_dim - Dimensionality of hidden layer in feed-forward network
                         (usually 2-4x larger than embed_dim)
            num_heads - Number of heads to use in the Multi-Head Attention block
            dropout - Amount of dropout to apply in the feed-forward network
        """
        super().__init__()

        self.layer_norm_1 = nn.LayerNorm(embed_dim)
        self.attn = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
        self.layer_norm_2 = nn.LayerNorm(embed_dim)
        self.linear = nn.Sequential(
            nn.Linear(embed_dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, embed_dim),
            nn.Dropout(dropout),
        )

    def forward(self, x):
        inp_x = self.layer_norm_1(x)
        x = x + self.attn(inp_x, inp_x, inp_x)[0]
        x = x + self.linear(self.layer_norm_2(x))
        return x


class VisionTransformer(nn.Module):
    def __init__(
        self,
        embed_dim,
        hidden_dim,
        num_channels,
        num_heads,
        num_layers,
        num_classes,
        patch_size,
        num_patches,
        dropout=0.0,
    ):
        """
        Inputs:
            embed_dim - Dimensionality of the input feature vectors to the Transformer
            hidden_dim - Dimensionality of the hidden layer in the feed-forward networks
                         within the Transformer
            num_channels - Number of channels of the input (3 for RGB)
            num_heads - Number of heads to use in the Multi-Head Attention block
            num_layers - Number of layers to use in the Transformer
            num_classes - Number of classes to predict
            patch_size - Number of pixels that the patches have per dimension
            num_patches - Maximum number of patches an image can have
            dropout - Amount of dropout to apply in the feed-forward network and
                      on the input encoding
        """
        super().__init__()

        self.patch_size = patch_size

        # Layers/Networks
        self.input_layer = nn.Linear(num_channels * (patch_size**2), embed_dim)
        self.transformer = nn.Sequential(
            *[
                AttentionBlock(embed_dim, hidden_dim, num_heads, dropout=dropout)
                for _ in range(num_layers)
            ]
        )
        self.mlp_head = nn.Sequential(
            nn.LayerNorm(embed_dim), nn.Linear(embed_dim, num_classes)
        )
        self.dropout = nn.Dropout(dropout)

        # Parameters/Embeddings
        self.cls_token = nn.Parameter(torch.randn(1, 1, embed_dim))
        self.pos_embedding = nn.Parameter(torch.randn(1, 1 + num_patches, embed_dim))

    def forward(self, x):
        # Preprocess input
        x = img_to_patch(x, self.patch_size)
        B, T, _ = x.shape
        x = self.input_layer(x)

        # Add CLS token and positional encoding
        cls_token = self.cls_token.repeat(B, 1, 1)
        x = torch.cat([cls_token, x], dim=1)
        x = x + self.pos_embedding[:, : T + 1]

        # Apply Transforrmer
        x = self.dropout(x)
        x = x.transpose(0, 1)
        x = self.transformer(x)

        # Perform classification prediction
        cls = x[0]
        out = self.mlp_head(cls)
        return out

- We also used transformer encode we used for CGM data for Viome and anthropometric data as well.
- We also compared the performance of transformer encoder against simple MLP (class called FC) for Viome and anthropometric data
- Next, we assessed the performance different combinations of joint modalities

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Define parameters for CGM model
cgm_model_kwargs = {
    'n_features': 1,
    'embed_dim': 96,
    'num_heads': 4,
    'num_classes': 64,
    'dropout': 0.2,
    'num_layers': 6
}

cgm_model = MultiheadAttention(**cgm_model_kwargs).to(device)

# Define parameters for Vision Transformer model
img_model_kwargs = {
    'embed_dim': 64,
    'hidden_dim': 128,
    'num_channels': 3,
    'num_heads': 2,
    'num_layers': 6,
    'num_classes': 64,
    'patch_size': 4,
    'num_patches': 256,
    'dropout': 0.2
}

img_model = VisionTransformer(**img_model_kwargs).to(device)

# Define parameters for viome model
viome_model_kwargs = {
    'n_features': 1,
    'embed_dim': 96,
    'num_heads': 4,
    'num_classes': 8,
    'dropout': 0.2,
    'num_layers': 6
}

viome_model = MultiheadAttention(**viome_model_kwargs).to(device)

# Define parameters for demo model
demo_model_kwargs = {
    'n_features': 1,
    'embed_dim': 96,
    'num_heads': 4,
    'num_classes': 8,
    'dropout': 0.2,
    'num_layers': 6
}

demo_model = MultiheadAttention(**demo_model_kwargs).to(device)


# Combining the multimodal model
multimodal_model = MultiModalModel([img_model, cgm_model],).to(device)

In [None]:
#prediction model

expected_input_size = 64 + 64
nutrient_predictor = Regressor(expected_input_size).to(device)

optimizer = optim.Adam(
        list(nutrient_predictor.parameters())
        + list(img_model.parameters())
        + list(cgm_model.parameters()),
        lr=1e-4,
        weight_decay=0.2,
    )


# Initialize lists to store epoch losses
m2_train_losses = []
m2_val_losses = []

# Training Loop
min_loss = float("inf")
epochs = 30

for epoch in range(epochs):
    epoch_train_loss = []
    
    # Training phase
    multimodal_model.train()
    for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        images, cgm_tensors, demos, labels = images.to(device), cgm_tensors.to(device), demos.to(device), labels.to(device).unsqueeze(-1)
        
        # Forward pass
        img_embedding, cgm_embedding = multimodal_model([images, cgm_tensors])
        combined_embedding = torch.cat((img_embedding, cgm_embedding), dim=-1)
        predictions = nutrient_predictor(combined_embedding)
        
        # Calculate RMSRE Loss
        msre = torch.mean((predictions - labels) ** 2 / (labels ** 2))
        msre.backward()
        optimizer.step()
        
        epoch_train_loss.append(msre.item())

    avg_train_loss = np.sqrt(np.mean(epoch_train_loss))
    m2_train_losses.append(avg_train_loss)  # Store train loss
    
    # Validation phase
    epoch_val_loss = []
    multimodal_model.eval()
    with torch.no_grad():
        for images, cgm_tensors, viomes, demos, labels in val_loader:
            images, cgm_tensors, demos, labels = images.to(device), cgm_tensors.to(device), demos.to(device), labels.to(device).unsqueeze(-1)
            
            img_embedding, cgm_embedding = multimodal_model([images, cgm_tensors])
            combined_embedding = torch.cat((img_embedding, cgm_embedding), dim=-1)
            predictions = nutrient_predictor(combined_embedding)
            
            val_loss = torch.mean(((labels - predictions) / labels) ** 2)
            epoch_val_loss.append(val_loss.item())
        
        avg_val_rmsre = np.sqrt(np.mean(epoch_val_loss))
        m2_val_losses.append(avg_val_rmsre)  # Store validation loss
        
        # Track minimum loss
        if avg_val_rmsre < min_loss:
            min_loss = avg_val_rmsre

    print(f"Epoch {epoch}, Train Loss: {avg_train_loss:.3f}, Val RMSRE: {avg_val_rmsre:.3f}")

In [None]:
# Plot Train and Validation Loss
plt.figure(figsize=(5, 5))
plt.plot(range(epochs), m2_train_losses, label='Train', marker='o', color='b')
plt.plot(range(epochs), m2_val_losses, label='Validation', marker='s',color='r')
plt.xlabel('Epochs')
plt.ylabel('RMSRE')
plt.legend()
plt.show()

In [None]:
print(np.mean(m2_train_losses[14:]),np.std(m2_train_losses[14:]))
print(np.mean(m2_val_losses[14:]),np.std(m2_val_losses[14:]))

### Image+Demo (transformer)

In [None]:
# Combining the multimodal model
multimodal_model = MultiModalModel([img_model, demo_model],).to(device)

In [None]:
#prediction model

expected_input_size = 64 + 8
nutrient_predictor = Regressor(expected_input_size).to(device)

optimizer = optim.Adam(
        list(nutrient_predictor.parameters())
        + list(img_model.parameters())
        + list(demo_model.parameters()),
        lr=1e-4,
        weight_decay=0.2,
    )


# Initialize lists to store epoch losses
m3_train_losses = []
m3_val_losses = []

# Training Loop
min_loss = float("inf")
epochs = 30

for epoch in range(epochs):
    epoch_train_loss = []
    
    # Training phase
    multimodal_model.train()
    for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        images, cgm_tensors, demos, labels = images.to(device), cgm_tensors.to(device), demos.to(device), labels.to(device).unsqueeze(-1)
        
        # Forward pass
        img_embedding, demo_embedding = multimodal_model([images, demos])
        combined_embedding = torch.cat((img_embedding, demo_embedding), dim=-1)
        predictions = nutrient_predictor(combined_embedding)
        
        # Calculate RMSRE Loss
        msre = torch.mean((predictions - labels) ** 2 / (labels ** 2))
        msre.backward()
        optimizer.step()
        
        epoch_train_loss.append(msre.item())

    avg_train_loss = np.sqrt(np.mean(epoch_train_loss))
    m3_train_losses.append(avg_train_loss)  # Store train loss
    
    # Validation phase
    epoch_val_loss = []
    multimodal_model.eval()
    with torch.no_grad():
        for images, cgm_tensors, viomes, demos, labels in val_loader:
            images, cgm_tensors, demos, labels = images.to(device), cgm_tensors.to(device), demos.to(device), labels.to(device).unsqueeze(-1)
            
            img_embedding, demo_embedding = multimodal_model([images, demos])
            combined_embedding = torch.cat((img_embedding, demo_embedding), dim=-1)
            predictions = nutrient_predictor(combined_embedding)
            
            val_loss = torch.mean(((labels - predictions) / labels) ** 2)
            epoch_val_loss.append(val_loss.item())
        
        avg_val_rmsre = np.sqrt(np.mean(epoch_val_loss))
        m3_val_losses.append(avg_val_rmsre)  # Store validation loss
        
        # Track minimum loss
        if avg_val_rmsre < min_loss:
            min_loss = avg_val_rmsre

    print(f"Epoch {epoch}, Train Loss: {avg_train_loss:.3f}, Val RMSRE: {avg_val_rmsre:.3f}")

In [None]:
# Plot Train and Validation Loss
plt.figure(figsize=(5, 5))
plt.plot(range(epochs), m3_train_losses, label='Train', marker='o', color='b')
plt.plot(range(epochs), m3_val_losses, label='Validation', marker='s',color='r')
plt.xlabel('Epochs')
plt.ylabel('RMSRE')
plt.legend()
plt.show()

In [None]:
print(np.mean(m3_train_losses[14:]),np.std(m3_train_losses[14:]))
print(np.mean(m3_val_losses[14:]),np.std(m3_val_losses[14:]))

### Image+Viome (transformer)

In [None]:
# Combining the multimodal model
multimodal_model = MultiModalModel([img_model, viome_model],).to(device)

In [None]:
#prediction model

expected_input_size = 64 + 8
nutrient_predictor = Regressor(expected_input_size).to(device)

optimizer = optim.Adam(
        list(nutrient_predictor.parameters())
        + list(img_model.parameters())
        + list(viome_model.parameters()),
        lr=1e-4,
        weight_decay=0.2,
    )


# Initialize lists to store epoch losses
m4_train_losses = []
m4_val_losses = []

# Training Loop
min_loss = float("inf")
epochs = 30

for epoch in range(epochs):
    epoch_train_loss = []
    
    # Training phase
    multimodal_model.train()
    for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        images, cgm_tensors,viomes, demos, labels = images.to(device), cgm_tensors.to(device), viomes.to(device), demos.to(device), labels.to(device).unsqueeze(-1)
        
        # Forward pass
        img_embedding, viome_embedding = multimodal_model([images, viomes])
        combined_embedding = torch.cat((img_embedding, viome_embedding), dim=-1)
        predictions = nutrient_predictor(combined_embedding)
        
        # Calculate RMSRE Loss
        msre = torch.mean((predictions - labels) ** 2 / (labels ** 2))
        msre.backward()
        optimizer.step()
        
        epoch_train_loss.append(msre.item())

    avg_train_loss = np.sqrt(np.mean(epoch_train_loss))
    m4_train_losses.append(avg_train_loss)  # Store train loss
    
    # Validation phase
    epoch_val_loss = []
    multimodal_model.eval()
    with torch.no_grad():
        for images, cgm_tensors, viomes, demos, labels in val_loader:
            images, cgm_tensors,viomes, demos, labels = images.to(device), cgm_tensors.to(device), viomes.to(device), demos.to(device), labels.to(device).unsqueeze(-1)
            
            img_embedding, viome_embedding = multimodal_model([images, viomes])
            combined_embedding = torch.cat((img_embedding, viome_embedding), dim=-1)
            predictions = nutrient_predictor(combined_embedding)
            
            val_loss = torch.mean(((labels - predictions) / labels) ** 2)
            epoch_val_loss.append(val_loss.item())
        
        avg_val_rmsre = np.sqrt(np.mean(epoch_val_loss))
        m4_val_losses.append(avg_val_rmsre)  # Store validation loss
        
        # Track minimum loss
        if avg_val_rmsre < min_loss:
            min_loss = avg_val_rmsre

    print(f"Epoch {epoch}, Train Loss: {avg_train_loss:.3f}, Val RMSRE: {avg_val_rmsre:.3f}")

In [None]:
# Plot Train and Validation Loss
plt.figure(figsize=(5, 5))
plt.plot(range(epochs), m4_train_losses, label='Train', marker='o', color='b')
plt.plot(range(epochs), m4_val_losses, label='Validation', marker='s',color='r')
plt.xlabel('Epochs')
plt.ylabel('RMSRE')
plt.legend()
plt.show()

In [None]:
print(np.mean(m4_train_losses[14:]),np.std(m4_train_losses[14:]))
print(np.mean(m4_val_losses[14:]),np.std(m4_val_losses[14:]))

### Image + CGM + Demo (transformer)

In [None]:
# Define parameters for demo model
# demo_model_kwargs = {
#     'n_features': 1,
#     'embed_dim': 96,
#     'num_heads': 4,
#     'num_classes': 8,
#     'dropout': 0.2,
#     'num_layers': 6
# }

# demo_model = MultiheadAttention(**demo_model_kwargs).to(device)

# Combining the multimodal model
multimodal_model = MultiModalModel([img_model, cgm_model, demo_model],).to(device)

#prediction model
expected_input_size = 64 + 64 + 8
nutrient_predictor = Regressor(expected_input_size, hidden=128, output_size=1, dropout=0.2).to(device)

import torch.optim as optim

optimizer = optim.Adam(
        list(nutrient_predictor.parameters())
        + list(img_model.parameters())
        + list(cgm_model.parameters())
        + list(demo_model.parameters()),
        lr=1e-4,
        weight_decay=0.2,
    )


# Initialize lists to store epoch losses
m5_train_losses = []
m5_val_losses = []

# Training Loop
min_loss = float("inf")
epochs = 30

for epoch in range(epochs):
    epoch_train_loss = []

    # Training phase
    multimodal_model.train()
    for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        images, cgm_tensors, demos, labels = images.to(device), cgm_tensors.to(device), demos.to(device), labels.to(device).unsqueeze(-1)
        
        # Forward pass
        img_embedding, cgm_embedding, demo_embedding = multimodal_model([images, cgm_tensors, demos])
        combined_embedding = torch.cat((img_embedding, cgm_embedding, demo_embedding), dim=-1)
        predictions = nutrient_predictor(combined_embedding)
        
        # Calculate RMSRE Loss
        msre = torch.mean((predictions - labels) ** 2 / (labels ** 2))
        msre.backward()
        optimizer.step()
        
        epoch_train_loss.append(msre.item())

    avg_train_loss = np.sqrt(np.mean(epoch_train_loss))
    m5_train_losses.append(avg_train_loss)  # Store train loss
    
    # Validation phase
    epoch_val_loss = []
    multimodal_model.eval()
    with torch.no_grad():
        for images, cgm_tensors, viomes, demos, labels in val_loader:
            images, cgm_tensors, demos, labels = images.to(device), cgm_tensors.to(device), demos.to(device), labels.to(device).unsqueeze(-1)
            
            img_embedding, cgm_embedding, demo_embedding = multimodal_model([images, cgm_tensors, demos])
            combined_embedding = torch.cat((img_embedding, cgm_embedding, demo_embedding), dim=-1)
            predictions = nutrient_predictor(combined_embedding)
            
            val_loss = torch.mean(((labels - predictions) / labels) ** 2)
            epoch_val_loss.append(val_loss.item())
        
        avg_val_rmsre = np.sqrt(np.mean(epoch_val_loss))
        m5_val_losses.append(avg_val_rmsre)  # Store validation loss
        
        # Track minimum loss
        if avg_val_rmsre < min_loss:
            min_loss = avg_val_rmsre

    print(f"Epoch {epoch}, Train Loss: {avg_train_loss:.3f}, Val RMSRE: {avg_val_rmsre:.3f}")

In [None]:
# Plot Train and Validation Loss
plt.figure(figsize=(5, 5))
plt.plot(range(epochs), m5_train_losses, label='Train', marker='o', color='b')
plt.plot(range(epochs), m5_val_losses, label='Validation', marker='s',color='r')
plt.xlabel('Epochs')
plt.ylabel('RMSRE')
plt.legend()
plt.show()
print(np.mean(m5_train_losses[14:]),np.std(m5_train_losses[14:]))
print(np.mean(m5_val_losses[14:]),np.std(m5_val_losses[14:]))

### Image+CGM+Viome (transformer)

In [None]:
class FC(nn.Module):
    def __init__(self, input_size=27, embedding_size=27):
        super(FC, self).__init__()
        self.fc = nn.Sequential(
            nn.Flatten(),                
            nn.Linear(input_size, 128),  
            nn.ReLU(),
            nn.Linear(128, embedding_size)  
        )

    def forward(self, x):
        embedding = self.fc(x)  
        return embedding

In [None]:
# # Define parameters for Viome model
# viome_model_kwargs = {
#     'n_features': 1,
#     'embed_dim': 96,
#     'num_heads': 4,
#     'num_classes': 8,
#     'dropout': 0.2,
#     'num_layers': 6
# }

# # Create viome model
# viome_model =  MultiheadAttention(**viome_model_kwargs).to(device)
# #viome_model = FC(input_size=27, embedding_size=27).to(device)

# Combining the multimodal model
multimodal_model = MultiModalModel([img_model, cgm_model, viome_model],).to(device)

#prediction model
expected_input_size = 64 + 64 + 8
nutrient_predictor = Regressor(expected_input_size, hidden=128, output_size=1, dropout=0.2).to(device)

optimizer = optim.Adam(
        list(nutrient_predictor.parameters())
        + list(img_model.parameters())
        + list(cgm_model.parameters())
        + list(viome_model.parameters()),
        lr=1e-4,
        weight_decay=0.2,
    )


# Initialize lists to store epoch losses
m6_train_losses = []
m6_val_losses = []

# Training Loop
min_loss = float("inf")
epochs = 30

for epoch in range(epochs):
    epoch_train_loss = []

    # Training phase
    multimodal_model.train()
    for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        images, cgm_tensors, viomes, labels = images.to(device), cgm_tensors.to(device), viomes.to(device), labels.to(device).unsqueeze(-1)
        
        # Forward pass
        img_embedding, cgm_embedding, viome_embedding = multimodal_model([images, cgm_tensors, viomes])
        combined_embedding = torch.cat((img_embedding, cgm_embedding, viome_embedding), dim=-1)
        predictions = nutrient_predictor(combined_embedding)
        
        # Calculate RMSRE Loss
        msre = torch.mean((predictions - labels) ** 2 / (labels ** 2))
        msre.backward()
        optimizer.step()
        
        epoch_train_loss.append(msre.item())

    avg_train_loss = np.sqrt(np.mean(epoch_train_loss))
    m6_train_losses.append(avg_train_loss)  # Store train loss
    
    # Validation phase
    epoch_val_loss = []
    multimodal_model.eval()
    with torch.no_grad():
        for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(val_loader):
            images, cgm_tensors, viomes, labels = images.to(device), cgm_tensors.to(device), viomes.to(device), labels.to(device).unsqueeze(-1)
            
            img_embedding, cgm_embedding, viome_embedding = multimodal_model([images, cgm_tensors, viomes])
            combined_embedding = torch.cat((img_embedding, cgm_embedding, viome_embedding), dim=-1)
            predictions = nutrient_predictor(combined_embedding)
            
            val_loss = torch.mean(((labels - predictions) / labels) ** 2)
            epoch_val_loss.append(val_loss.item())
        
        avg_val_rmsre = np.sqrt(np.mean(epoch_val_loss))
        m6_val_losses.append(avg_val_rmsre)  # Store validation loss
        
        # Track minimum loss
        if avg_val_rmsre < min_loss:
            min_loss = avg_val_rmsre

    print(f"Epoch {epoch}, Train Loss: {avg_train_loss:.3f}, Val RMSRE: {avg_val_rmsre:.3f}")

In [None]:
# Plot Train and Validation Loss
plt.figure(figsize=(5, 5))
plt.plot(range(epochs), m6_train_losses, label='Train', marker='o', color='b')
plt.plot(range(epochs), m6_val_losses, label='Validation', marker='s',color='r')
plt.xlabel('Epochs')
plt.ylabel('RMSRE')
plt.legend()
plt.savefig('jointmodel.png', dpi=300)
plt.tight_layout()
plt.show()



In [None]:
print(np.mean(m6_train_losses[14:]),np.std(m6_train_losses[14:]))
print(np.mean(m6_val_losses[14:]),np.std(m6_val_losses[14:]))

# Model adding all modalities

In [None]:
# Combining the multimodal model
multimodal_model = MultiModalModel([img_model, cgm_model, viome_model, demo_model],).to(device)

#prediction model
expected_input_size = 64 + 64 + 8 + 8
nutrient_predictor = Regressor(expected_input_size, hidden=128, output_size=1, dropout=0.2).to(device)

optimizer = optim.Adam(
        list(nutrient_predictor.parameters())
        + list(img_model.parameters())
        + list(cgm_model.parameters())
        + list(viome_model.parameters())
        + list(demo_model.parameters()),
        lr=1e-4,
        weight_decay=0.2,
    )


# Initialize lists to store epoch losses
m7_train_losses = []
m7_val_losses = []

# Training Loop
min_loss = float("inf")
epochs = 30

for epoch in range(epochs):
    epoch_train_loss = []

    # Training phase
    multimodal_model.train()
    for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        images, cgm_tensors, viomes, demos, labels = images.to(device), cgm_tensors.to(device), viomes.to(device), demos.to(device), labels.to(device).unsqueeze(-1)
        
        # Forward pass
        img_embedding, cgm_embedding, viome_embedding, demo_embedding = multimodal_model([images, cgm_tensors, viomes, demos])
        combined_embedding = torch.cat((img_embedding, cgm_embedding, viome_embedding, demo_embedding), dim=-1)
        predictions = nutrient_predictor(combined_embedding)
        
        # Calculate RMSRE Loss
        msre = torch.mean((predictions - labels) ** 2 / (labels ** 2))
        msre.backward()
        optimizer.step()
        
        epoch_train_loss.append(msre.item())

    avg_train_loss = np.sqrt(np.mean(epoch_train_loss))
    m7_train_losses.append(avg_train_loss)  # Store train loss
    
    # Validation phase
    epoch_val_loss = []
    multimodal_model.eval()
    with torch.no_grad():
        for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(val_loader):
            images, cgm_tensors, viomes, demos, labels = images.to(device), cgm_tensors.to(device), viomes.to(device), demos.to(device), labels.to(device).unsqueeze(-1)
            
            img_embedding, cgm_embedding, viome_embedding, demo_embedding = multimodal_model([images, cgm_tensors, viomes, demos])
            combined_embedding = torch.cat((img_embedding, cgm_embedding, viome_embedding, demo_embedding), dim=-1)
            predictions = nutrient_predictor(combined_embedding)
            
            val_loss = torch.mean(((labels - predictions) / labels) ** 2)
            epoch_val_loss.append(val_loss.item())
        
        avg_val_rmsre = np.sqrt(np.mean(epoch_val_loss))
        m7_val_losses.append(avg_val_rmsre)  # Store validation loss
        
        # Track minimum loss
        if avg_val_rmsre < min_loss:
            min_loss = avg_val_rmsre

    print(f"Epoch {epoch}, Train Loss: {avg_train_loss:.3f}, Val RMSRE: {avg_val_rmsre:.3f}")

In [None]:
# Plot Train and Validation Loss
plt.figure(figsize=(5, 5))
plt.plot(range(epochs), m7_train_losses, label='Train', marker='o', color='b')
plt.plot(range(epochs), m7_val_losses, label='Validation', marker='s',color='r')
plt.xlabel('Epochs')
plt.ylabel('RMSRE')
plt.legend()
plt.savefig('jointmodel.png', dpi=300)
plt.tight_layout()
plt.show()

print(np.mean(m7_train_losses[1:]),np.std(m7_train_losses[1:]))
print(np.mean(m7_val_losses[1:]),np.std(m7_val_losses[1:]))

In [None]:
print(np.mean(m7_train_losses[14:]),np.std(m7_train_losses[14:]))
print(np.mean(m7_val_losses[14:]),np.std(m7_val_losses[14:]))

## Hyperparameter tuning with all modalities

In [None]:
epochs = 15 #since RMSE is plateauing 

In [None]:
# lr and weight_decay in optimizer
# dropout rate and number of hidden in Regressor model
# batch_size

In [None]:
import random

# Set the seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Learning rates to test
learning_rates = [1e-5, 1e-4, 1e-3, 1e-2]

# Initialize the best loss and model state
best_val_loss = float("inf")
best_model_params = None
best_lr = None

expected_input_size = 64 + 64 + 8 + 8

# Loop through each learning rate
for lr in learning_rates:
    print(f"\nTesting learning rate: {lr}")
#     multimodal_model = MultiModalModel([img_model, cgm_model, viome_model]).to(device)
    multimodal_model = MultiModalModel([img_model, cgm_model, viome_model, demo_model],).to(device)
    nutrient_predictor = Regressor(expected_input_size, hidden=128, output_size=1, dropout=0.2).to(device)
    optimizer = optim.Adam(
        list(nutrient_predictor.parameters())
        + list(img_model.parameters())
        + list(cgm_model.parameters())
        + list(viome_model.parameters())
        + list(demo_model.parameters()),
        lr=lr,  # changing lr
        weight_decay=0.2,
    )

    # Initialize lists to store epoch losses
    train_losses = []
    val_losses = []
    
    # Training Loop
    for epoch in range(epochs):
        epoch_train_loss = []
        
        # Training phase
        multimodal_model.train()
        for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            images, cgm_tensors, viomes, demos, labels = images.to(device), cgm_tensors.to(device), viomes.to(device), demos.to(device), labels.to(device).unsqueeze(-1)

            # Forward pass
            img_embedding, cgm_embedding, viome_embedding, demo_embedding = multimodal_model([images, cgm_tensors, viomes, demos])
            combined_embedding = torch.cat((img_embedding, cgm_embedding, viome_embedding, demo_embedding), dim=-1)
            predictions = nutrient_predictor(combined_embedding)

            # Calculate RMSRE Loss
            msre = torch.mean((predictions - labels) ** 2 / (labels ** 2))
            msre.backward()
            optimizer.step()

            epoch_train_loss.append(msre.item())

        avg_train_loss = np.sqrt(np.mean(epoch_train_loss))
        train_losses.append(avg_train_loss)
        
        # Validation phase
        epoch_val_loss = []
        multimodal_model.eval()
        with torch.no_grad():
            for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(val_loader):
                images, cgm_tensors, viomes, demos, labels = images.to(device), cgm_tensors.to(device), viomes.to(device), demos.to(device), labels.to(device).unsqueeze(-1)

                img_embedding, cgm_embedding, viome_embedding, demo_embedding = multimodal_model([images, cgm_tensors, viomes, demos])
                combined_embedding = torch.cat((img_embedding, cgm_embedding, viome_embedding, demo_embedding), dim=-1)
                predictions = nutrient_predictor(combined_embedding)

                val_loss = torch.mean(((labels - predictions) / labels) ** 2)
                epoch_val_loss.append(val_loss.item())

        # Compute average validation loss across the entire epoch
        avg_val_loss = np.sqrt(np.mean(epoch_val_loss))
        val_losses.append(avg_val_loss)

    
        print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {avg_train_loss:.3f}, Val Loss: {avg_val_loss:.3f}")
        
    # select the best model based on average validation loss
    average_val_loss = np.mean(val_losses[-10:])
    if average_val_loss < best_val_loss:
        best_val_loss = average_val_loss
        best_lr = lr

print(f"\nBest Learning Rate: {best_lr}, Best Validation Loss: {best_val_loss:.3f}")

In [None]:
# Weight decays to test
weight_decays = [0, 0.1, 0.2, 0.3]

# Initialize the best loss and model state
best_val_loss = float("inf")
best_model_params = None
best_decay = None

# Loop through each weight decay
for weight_decay in weight_decays:
    print(f"\nTesting weight decay: {weight_decay}")
    multimodal_model = MultiModalModel([img_model, cgm_model, viome_model, demo_model],).to(device)
    nutrient_predictor = Regressor(expected_input_size, hidden=128, output_size=1, dropout=0.2).to(device)
    optimizer = optim.Adam(
        list(nutrient_predictor.parameters())
        + list(img_model.parameters())
        + list(cgm_model.parameters())
        + list(viome_model.parameters())
        + list(demo_model.parameters()),
        lr=best_lr,  # changing lr
        weight_decay=weight_decay,
    )


    # Initialize lists to store epoch losses
    train_losses = []
    val_losses = []
    
    # Training Loop
    for epoch in range(epochs):
        epoch_train_loss = []

        # Training phase
        multimodal_model.train()
        for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            images, cgm_tensors, viomes, demos, labels = images.to(device), cgm_tensors.to(device), viomes.to(device), demos.to(device), labels.to(device).unsqueeze(-1)

            # Forward pass
            img_embedding, cgm_embedding, viome_embedding, demo_embedding = multimodal_model([images, cgm_tensors, viomes, demos])
            combined_embedding = torch.cat((img_embedding, cgm_embedding, viome_embedding, demo_embedding), dim=-1)
            predictions = nutrient_predictor(combined_embedding)

            # Calculate RMSRE Loss
            msre = torch.mean((predictions - labels) ** 2 / (labels ** 2))
            msre.backward()
            optimizer.step()

            epoch_train_loss.append(msre.item())

        avg_train_loss = np.sqrt(np.mean(epoch_train_loss))
        train_losses.append(avg_train_loss)
        
        # Validation phase
        epoch_val_loss = []
        multimodal_model.eval()
        with torch.no_grad():
            for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(val_loader):
                images, cgm_tensors, viomes, demos, labels = images.to(device), cgm_tensors.to(device), viomes.to(device), demos.to(device), labels.to(device).unsqueeze(-1)

                img_embedding, cgm_embedding, viome_embedding, demo_embedding = multimodal_model([images, cgm_tensors, viomes, demos])
                combined_embedding = torch.cat((img_embedding, cgm_embedding, viome_embedding, demo_embedding), dim=-1)
                predictions = nutrient_predictor(combined_embedding)

                val_loss = torch.mean(((labels - predictions) / labels) ** 2)
                epoch_val_loss.append(val_loss.item())

        # Compute average validation loss across the entire epoch
        avg_val_loss = np.sqrt(np.mean(epoch_val_loss))
        val_losses.append(avg_val_loss)

        # Compute average validation loss across the entire epoch
        avg_val_loss = np.sqrt(np.mean(epoch_val_loss))
        val_losses.append(avg_val_loss)

        print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {avg_train_loss:.3f}, Val Loss: {avg_val_loss:.3f}")
    
    # select the best model based on average validation loss
    average_val_loss = np.mean(val_losses[-10:])
    if average_val_loss < best_val_loss:
        best_val_loss = average_val_loss
        best_decay = weight_decay

print(f"\nBest Weight Decay: {best_decay}, Best Validation Loss: {best_val_loss:.3f}")

In [None]:
# List of hidden units to test
hidden_units = [32, 64, 128, 256]

# Initialize the best loss and model state
best_val_loss = float("inf")
best_model_params = None
best_hidden = None

# Loop through each number of hidden units
for hidden in hidden_units:
    print(f"\nTesting # hidden units: {hidden}")
    multimodal_model = MultiModalModel([img_model, cgm_model, viome_model, demo_model],).to(device)
    nutrient_predictor = Regressor(expected_input_size, hidden=hidden, output_size=1, dropout=0.2).to(device)
    optimizer = optim.Adam(
        list(nutrient_predictor.parameters())
        + list(img_model.parameters())
        + list(cgm_model.parameters())
        + list(viome_model.parameters())
        + list(demo_model.parameters()),
        lr=best_lr,  # changing lr
        weight_decay=best_decay,
    )

    # Initialize lists to store epoch losses
    train_losses = []
    val_losses = []

    # Training Loop
    for epoch in range(epochs):
        epoch_train_loss = []

        # Training phase
        multimodal_model.train()
        for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            images, cgm_tensors, viomes, demos, labels = images.to(device), cgm_tensors.to(device), viomes.to(device), demos.to(device), labels.to(device).unsqueeze(-1)

            # Forward pass
            img_embedding, cgm_embedding, viome_embedding, demo_embedding = multimodal_model([images, cgm_tensors, viomes, demos])
            combined_embedding = torch.cat((img_embedding, cgm_embedding, viome_embedding, demo_embedding), dim=-1)
            predictions = nutrient_predictor(combined_embedding)

            # Calculate RMSRE Loss
            msre = torch.mean((predictions - labels) ** 2 / (labels ** 2))
            msre.backward()
            optimizer.step()

            epoch_train_loss.append(msre.item())

        avg_train_loss = np.sqrt(np.mean(epoch_train_loss))
        train_losses.append(avg_train_loss)
        
        # Validation phase
        epoch_val_loss = []
        multimodal_model.eval()
        with torch.no_grad():
            for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(val_loader):
                images, cgm_tensors, viomes, demos, labels = images.to(device), cgm_tensors.to(device), viomes.to(device), demos.to(device), labels.to(device).unsqueeze(-1)

                img_embedding, cgm_embedding, viome_embedding, demo_embedding = multimodal_model([images, cgm_tensors, viomes, demos])
                combined_embedding = torch.cat((img_embedding, cgm_embedding, viome_embedding, demo_embedding), dim=-1)
                predictions = nutrient_predictor(combined_embedding)

                val_loss = torch.mean(((labels - predictions) / labels) ** 2)
                epoch_val_loss.append(val_loss.item())

        # Compute average validation loss across the entire epoch
        avg_val_loss = np.sqrt(np.mean(epoch_val_loss))
        val_losses.append(avg_val_loss)

        print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {avg_train_loss:.3f}, Val Loss: {avg_val_loss:.3f}")
    
    # select the best model based on average validation loss
    average_val_loss = np.mean(val_losses[-10:])
    if average_val_loss < best_val_loss:
        best_val_loss = average_val_loss
        best_hidden = hidden

print(f"\nBest # hidden units: {best_hidden}, Best Validation Loss: {best_val_loss:.3f}")

In [None]:
# Dropout rates to test
dropout_rates = [0, 0.1, 0.2, 0.3]

# Initialize the best loss and model state
best_val_loss = float("inf")
best_model_params = None
best_drop = None

# Loop through each dropout rate
for drop in dropout_rates:
    print(f"\nTesting dropout rate: {drop}")
    multimodal_model = MultiModalModel([img_model, cgm_model, viome_model, demo_model],).to(device)
    nutrient_predictor = Regressor(expected_input_size, hidden=hidden, output_size=1, dropout=drop).to(device)
    optimizer = optim.Adam(
        list(nutrient_predictor.parameters())
        + list(img_model.parameters())
        + list(cgm_model.parameters())
        + list(viome_model.parameters())
        + list(demo_model.parameters()),
        lr=best_lr,  # changing lr
        weight_decay=best_decay,
    )
    
    # Initialize lists to store losses
    train_losses = []
    val_losses = []

    # Training Loop
    for epoch in range(epochs):
        epoch_train_loss = []

        # Training phase
        multimodal_model.train()
        for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            images, cgm_tensors, viomes, demos, labels = images.to(device), cgm_tensors.to(device), viomes.to(device), demos.to(device), labels.to(device).unsqueeze(-1)

            # Forward pass
            img_embedding, cgm_embedding, viome_embedding, demo_embedding = multimodal_model([images, cgm_tensors, viomes, demos])
            combined_embedding = torch.cat((img_embedding, cgm_embedding, viome_embedding, demo_embedding), dim=-1)
            predictions = nutrient_predictor(combined_embedding)

            # Calculate RMSRE Loss
            msre = torch.mean((predictions - labels) ** 2 / (labels ** 2))
            msre.backward()
            optimizer.step()

            epoch_train_loss.append(msre.item())

        avg_train_loss = np.sqrt(np.mean(epoch_train_loss))
        train_losses.append(avg_train_loss)
        
        # Validation phase
        epoch_val_loss = []
        multimodal_model.eval()
        with torch.no_grad():
            for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(val_loader):
                images, cgm_tensors, viomes, demos, labels = images.to(device), cgm_tensors.to(device), viomes.to(device), demos.to(device), labels.to(device).unsqueeze(-1)

                img_embedding, cgm_embedding, viome_embedding, demo_embedding = multimodal_model([images, cgm_tensors, viomes, demos])
                combined_embedding = torch.cat((img_embedding, cgm_embedding, viome_embedding, demo_embedding), dim=-1)
                predictions = nutrient_predictor(combined_embedding)

                val_loss = torch.mean(((labels - predictions) / labels) ** 2)
                epoch_val_loss.append(val_loss.item())

        # Compute average validation loss (RMSRE)
        avg_val_rmsre = np.sqrt(np.mean(epoch_val_loss[1:]))
        val_losses.append(avg_val_rmsre)

        print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {avg_train_loss:.3f}, Val RMSRE: {avg_val_rmsre:.3f}")

    # select the best model based on average validation loss
    average_val_loss = np.mean(val_losses[-10:])
    if average_val_loss < best_val_loss:
        best_val_loss = average_val_loss
        best_drop = drop
        
print(f"\nBest dropout rate: {best_drop}, Best Validation RMSRE: {best_val_loss:.3f}")

In [None]:
print('Best learning rate:',best_lr)
print('Best Weight decay:',best_decay)
print('Best number of hidden layer:',best_hidden)
print('Best dropout rate:',best_drop)

In [None]:
# Combining the multimodal model
multimodal_model = MultiModalModel([img_model, cgm_model, viome_model, demo_model],).to(device)

#prediction model
expected_input_size = 64 + 64 + 8 + 8
nutrient_predictor = Regressor(expected_input_size, hidden=64, output_size=1, dropout=best_drop).to(device)

optimizer = optim.Adam(
        list(nutrient_predictor.parameters())
        + list(img_model.parameters())
        + list(cgm_model.parameters())
        + list(viome_model.parameters())
        + list(demo_model.parameters()),
        lr=best_lr,  # changing lr
        weight_decay=best_decay,
    )


# Initialize lists to store epoch losses
train_losses = []
val_losses = []

# Training Loop
min_loss = float("inf")
epochs = 15

for epoch in range(epochs):
    epoch_train_loss = []
    
    # Training phase
    multimodal_model.train()
    for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        images, cgm_tensors, viomes, demos, labels = images.to(device), cgm_tensors.to(device), viomes.to(device), demos.to(device), labels.to(device).unsqueeze(-1)

        # Forward pass
        img_embedding, cgm_embedding, viome_embedding, demo_embedding = multimodal_model([images, cgm_tensors, viomes, demos])
        combined_embedding = torch.cat((img_embedding, cgm_embedding, viome_embedding, demo_embedding), dim=-1)
        predictions = nutrient_predictor(combined_embedding)

        # Calculate RMSRE Loss
        msre = torch.mean((predictions - labels) ** 2 / (labels ** 2))
        msre.backward()
        optimizer.step()

        epoch_train_loss.append(msre.item())

    avg_train_loss = np.sqrt(np.mean(epoch_train_loss))
    train_losses.append(avg_train_loss)

    # Validation phase
    epoch_val_loss = []
    multimodal_model.eval()
    with torch.no_grad():
        for batch_idx, (images, cgm_tensors, viomes, demos, labels) in enumerate(val_loader):
            images, cgm_tensors, viomes, demos, labels = images.to(device), cgm_tensors.to(device), viomes.to(device), demos.to(device), labels.to(device).unsqueeze(-1)

            img_embedding, cgm_embedding, viome_embedding, demo_embedding = multimodal_model([images, cgm_tensors, viomes, demos])
            combined_embedding = torch.cat((img_embedding, cgm_embedding, viome_embedding, demo_embedding), dim=-1)
            predictions = nutrient_predictor(combined_embedding)

            val_loss = torch.mean(((labels - predictions) / labels) ** 2)
            epoch_val_loss.append(val_loss.item())
        
        avg_val_rmsre = np.sqrt(np.mean(epoch_val_loss))
        val_losses.append(avg_val_rmsre)  # Store validation loss
        
        # Track minimum loss
        if avg_val_rmsre < min_loss:
            min_loss = avg_val_rmsre

    print(f"Epoch {epoch}, Train Loss: {avg_train_loss:.3f}, Val RMSRE: {avg_val_rmsre:.3f}")

## Processing test data

### Viome data

In [None]:
demo_viome_test = pd.read_csv('demo_viome_test.csv')
demo_viome_test.head()

In [None]:
demo_viome_test.isna().sum()

In [None]:
demo_viome_test_processed = demo_viome_data_preprocessing(demo_viome_test)
demo_viome_test_processed.head()

### CGM data

In [None]:
cgm_test = pd.read_csv('cgm_test.csv')
cgm_test

In [None]:
cgm_test.isnull().sum()

### Image data

In [None]:
img_test = pd.read_csv('img_test.csv')
img_test

In [None]:
img_test.isna().sum()

In [None]:
# Impute missing values in 'Breakfast Fiber' with the median
img_test['Breakfast Fiber'] = img_test['Breakfast Fiber'].fillna(img_test['Breakfast Fiber'].median())

In [None]:
img_test.isna().sum()

### Merge the three dataframes and labels

In [None]:
# Merge the CGM_data and Image_data based on 'Subject ID' and 'Day' columns
merged_df_test = pd.merge(cgm_test, img_test, on=['Subject ID', 'Day'])

In [None]:
merged_df_test_wdemo_wviome = merged_df_test.merge(
    demo_viome_test_processed,
    on='Subject ID',  # Merge key
    how='left'        # Keeps all rows from merged_df_train_wlabels
)

In [None]:
rows_with_empty_values = merged_df_test_wdemo_wviome[merged_df_test_wdemo_wviome['Image Before Lunch'] == '[]']
print(rows_with_empty_values['Image Before Lunch'])

In [None]:
rows_with_empty_values = merged_df_test_wdemo_wviome[merged_df_test_wdemo_wviome['Image Before Breakfast'] == '[]']
print(rows_with_empty_values['Image Before Breakfast'])

In [None]:
rows_with_empty_values = merged_df_test_wdemo_wviome[merged_df_test_wdemo_wviome['CGM Data'] == '[]']
print(rows_with_empty_values['CGM Data'])

In [None]:
# Apply preprocessing and interpolation
merged_df_test_wdemo_wviome["CGM Data"] = merged_df_test_wdemo_wviome["CGM Data"].apply(parse_cgm_data)  # Parse CGM Data
merged_df_test_wdemo_wviome["CGM Data Per Minute"] = merged_df_test_wdemo_wviome["CGM Data"].apply(interpolate_cgm_data)

In [None]:
class MultiModalDataset_test(Dataset):
    def __init__(self, dataframe, image_columns, cgm_columns, viome_columns, demo_columns, transform=None, target_transform=None):
        self.df = dataframe
        self.image_columns = image_columns
        self.cgm_columns = cgm_columns
        self.viome_columns = viome_columns
        self.demo_columns = demo_columns
        self.transform = transform
        self.target_transform = target_transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        # Process image data
        image_data = self.df.iloc[idx][self.image_columns[0]]
        image = ast.literal_eval(image_data)
        image_array = np.array(image)
        image_array = image_array / 255 # Normalize the pixel values between 0 and 1
        image_tensor = torch.tensor(image_array, dtype=torch.float32).permute(2, 0, 1)  # Shape: (C, H, W)
        # Process CGM time-series data (ignore timestamps)
        cgm_data = self.df.iloc[idx][self.cgm_columns]
        #print(cgm_data)
        cgm_values = [entry[1] for entry in cgm_data]
        cgm_array = np.array(cgm_values).reshape(-1, 1)
        cgm_tensor = torch.tensor(cgm_values, dtype=torch.float32)  # Shape: (time_series_length,)
        # adding Viome data
        viome_data = self.df.iloc[idx][self.viome_columns[0]]
        #print(viome_data)
        viome_tensor = torch.tensor(viome_data, dtype=torch.float32)
        demo_data = self.df.iloc[idx][self.demo_columns]
        #print(f"Demo data at index {idx}: {demo_data}") 
        demo_tensor = torch.tensor(demo_data, dtype=torch.float32)

        if self.transform:
            image_tensor = self.transform(image_tensor)

        return image_tensor, cgm_tensor, viome_tensor, demo_tensor

def collate_test_fn(batch):
    images, cgm_tensors, viomes, demos = zip(*batch)

    # Stack image tensors and labels
    images = torch.stack(images)  # Shape: (batch_size, C, H, W)

    viomes = torch.stack(viomes)
    demos = torch.stack(demos)

    # Pad CGM tensors to the max_length, # Shape: (batch_size, max_seq_len)
    max_length = 750
    cgm_tensors = pad_sequence([F.pad(tensor, (0, max_length - len(tensor)), "constant", 0)
                               for tensor in cgm_tensors], batch_first=True)

    # Reshape CGM tensor to add an 'input_size' dimension (1 for single feature per time-step)
    cgm_tensors = cgm_tensors.unsqueeze(-1)  # Shape: (batch_size, max_seq_len, 1)
    viomes = viomes.unsqueeze(-1)

    demos = demos.unsqueeze(-1)
    return images, cgm_tensors, viomes, demos



In [None]:
# Initialize MultiModalDataset
dataset_test = MultiModalDataset_test(
    dataframe=merged_df_test_wdemo_wviome,  
    image_columns=image_columns,
    cgm_columns=cgm_columns,  
    viome_columns=viome_columns,
    demo_columns = demo_columns
)

In [None]:
test_loader = DataLoader(dataset_test, batch_size=10, shuffle=True, collate_fn=collate_test_fn)

In [None]:
# Test phase

# List to store predictions
y_test_pred = []

multimodal_model.eval()
        
with torch.no_grad():
    for batch_idx, (images, cgm_tensors, viomes, demos) in enumerate(test_loader):
        images, cgm_tensors, viomes, demos = images.to(device), cgm_tensors.to(device), viomes.to(device), demos.to(device)

        img_embedding, cgm_embedding, viome_embedding, demo_embedding = multimodal_model([images, cgm_tensors, viomes, demos])
        combined_embedding = torch.cat((img_embedding, cgm_embedding, viome_embedding, demo_embedding), dim=-1)
        predictions = nutrient_predictor(combined_embedding)

        # Append predictions to list
        y_test_pred.extend(predictions.cpu().numpy())

In [None]:
# Convert predictions to a numpy array
y_test_pred = np.array(y_test_pred)
# Flatten the array to 1D
y_test_pred_flattened = y_test_pred.squeeze()

In [None]:
label_train = pd.read_csv('label_train.csv')
# denormalize label by label mean
cal_train_mean = label_train['Lunch Calories'].mean()
cal_pred = y_test_pred_flattened*cal_train_mean

In [None]:
df = pd.DataFrame({
    "row_id": range(0, len(cal_pred)),
    "label": cal_pred      
})

# Save the DataFrame to a CSV file
df.to_csv("test_preds.csv", index=False)  # index=False to exclude the DataFrame index
df