In [2]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
from PIL import Image,UnidentifiedImageError
import os
import glob
import torchvision.transforms as transforms
import torch
# Mock function to simulate loading data from files
def load_images_for_date(date, image_folder):
    date_str = date.strftime('%Y%m%d')
    images = []
    for hour in range(24):
        pattern=os.path.join(image_folder,date_str,f"???-{date_str}{hour:02d}00.jpg")
        matched_files=glob.glob(pattern)
        if matched_files:
            img_path=matched_files[0]
            try:
                with Image.open(img_path) as img:
                    images.append(img.copy())
            except(OSError,UnidentifiedImageError):
                images.append(None)
        else:
            images.append(None)
    return images

def resize_image(images):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize images to 224x224 directly
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    features=[]
    for img in images:
        if img is not None:
            img=transform(img)
            features.append(img)
        else:
            features.append(torch.zeros(3,224,224))
    return features

def load_data_for_segment(station_name, start_date, end_date):
    print(f'Loading data for {station_name} from {start_date} to {end_date}')
    num_data=pd.read_csv(f'./dataset/{station_name}.csv')
    #process the numerical data
    num_data=num_data.drop(columns=['Station'])
    num_data['date']=pd.to_datetime(num_data['date'],format="%d-%m-%Y %H:%M",dayfirst=True)
    #delete all entries that are not in the segment
    # print(num_data.head(50))
    num_data=num_data[(num_data['date']>=start_date) & (num_data['date']<end_date)]
    print("Loaded num data!!!")
    #preprocess the data appropriately
    img_data={date:load_images_for_date(date,f'./dataset/{station_name}') for date in num_data['date'].dt.date.unique()}
    print("Loaded image data!!!")
    #process the image data, ie. apply transformation
    image_features = {date: resize_image(images) for date, images in img_data.items()}
    print("Resized image data!!!")
    del img_data
    #reshape the num data
    num_data = num_data.melt(id_vars=['date', 'measurement'], var_name='hour', value_name='value')
    num_data['hour'] = num_data['hour'].astype(int)
    num_data=num_data.pivot(index=['date','hour'],columns='measurement',values='value').reset_index()
    # num_data['date']=num_data['date'].dt.month
    num_data['month']=num_data['date'].dt.month
    print("Processed num data!!!")
    #generate combined features
    combined_features = []
    targets = []
    #instead of each row, we take 18 rows at a time
    for idx, row in num_data.iterrows():
        date = row['date'].date()
        hour = row['hour']
        numerical_features = row.drop(['date']).values
        img_features = image_features[date][hour]
        combined_feature = np.concatenate((numerical_features, img_features),axis=None)
        combined_features.append(combined_feature)
        # targets.append(row['value'])  # Assuming the target is the value for that hour
        targets.append(row.drop(['date','hour','month']).values)  # Assuming the target is the value for that hour
    print("Generated combined features and targets!!!")
    combined_features = np.array(combined_features)
    targets = np.array(targets)
    return combined_features, targets

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.models as models
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Define the multimodal neural network
class MultimodalNet(nn.Module):
    def __init__(self):
        super(MultimodalNet, self).__init__()
        
        # Define GRU for numerical features
        self.gru_num = nn.GRU(20, 64, 1, batch_first=True)
        
        # Load pre-trained MobileNetV3 as feature extractor
        mobilenet = models.mobilenet_v3_small(pretrained=True)
        self.mobilenet_features = mobilenet.features
        self.mobilenet_classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(576, 128)
        )  # Change to output 128-dim features
        
        # Define linear layers for numerical features
        self.fc1_num = nn.Linear(64, 128)
        self.fc2_num = nn.Linear(128, 64)
        
        # Define linear layers for combined features
        self.fc1_combined = nn.Linear(192, 64)
        self.fc2_combined = nn.Linear(64, 18)  # Output size (18 for regression)

    def forward(self, x_num, x_img):
        # Extract features using GRU for numerical features
        x_num, _ = self.gru_num(x_num)
        x_num = x_num[:, -1, :]  # Only take the last hidden state
        
        # Extract features using MobileNetV3 for image features
        x_img = self.mobilenet_features(x_img)
        x_img = self.mobilenet_classifier(x_img)
        
        # Apply linear layers for numerical features
        x_num = torch.relu(self.fc1_num(x_num))
        x_num = torch.relu(self.fc2_num(x_num))
        
        # Apply linear layers for combined features
        x_combined = torch.cat((x_num, x_img), dim=1)
        x_combined = torch.relu(self.fc1_combined(x_combined))
        x_combined = self.fc2_combined(x_combined)
        
        return x_combined

# Instantiate and train the model
model = MultimodalNet().to(device)
criterion = nn.MSELoss()

optimizer = optim.Adam([
    {'params': model.gru_num.parameters()},  # GRU parameters
    {'params': model.mobilenet_features.parameters(), 'lr': 0.0001},  # MobileNetV3 feature extractor parameters with a lower learning rate
    {'params': model.mobilenet_classifier.parameters(), 'lr': 0.0001},  # MobileNetV3 classifier parameters with a lower learning rate
    {'params': model.fc1_num.parameters()},
    {'params': model.fc2_num.parameters()},
    {'params': model.fc1_combined.parameters()},
    {'params': model.fc2_combined.parameters()}
], lr=0.001)  # Default learning rate for other parameters


Using device: cuda




In [4]:

def to_float_with_nan(x):
    try:
        return float(x)
    except ValueError:
        return -1.0
    
vectorized_to_float_with_nan = np.vectorize(to_float_with_nan)

In [9]:
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.models as models
import numpy as np
class MultimodalDataset(Dataset):
    def __init__(self, num_features, img_features, targets):
        self.num_features = num_features
        self.img_features = img_features
        self.targets = targets

    def __len__(self):
        return len(self.num_features)

    def __getitem__(self, idx):
        return self.num_features[idx], self.img_features[idx], self.targets[idx]

stations=['Keelung','Banqiao','Tucheng','Chiayi']
month_interval=2
for epoch in range(20):
    print(f"Epoch {epoch+1}")
    for station_name in stations:
        for start_month in range(1,13,month_interval):
            start_date=pd.to_datetime(f'2022-{start_month:02d}-01')
            if start_month+month_interval>12:
                end_date=pd.to_datetime('2023-01-01')
            else:
                end_date=pd.to_datetime(f'2022-{start_month+month_interval:02d}-01')
            combined_features,targets=load_data_for_segment(station_name,start_date,end_date)
            print("Loaded data for",station_name,start_date,end_date)
            combined_features=vectorized_to_float_with_nan(combined_features)
            targets=vectorized_to_float_with_nan(targets)
            #replace nans with column means
            combined_features[:,:20]=StandardScaler().fit_transform(combined_features[:,:20])
            #comvert to tensor
            combined_features = torch.tensor(combined_features, dtype=torch.float32)
            targets = torch.tensor(targets, dtype=torch.float32)
            #replace all the nan values with -1
            combined_features[torch.isnan(combined_features)]=-1
            targets[torch.isnan(targets)]=-1
            num_features=combined_features[:,:20]
            num_features=num_features.unsqueeze(1)
            img_features=combined_features[:,20:]
            img_features=img_features.reshape(-1,3,224,224)
            #move all to gpu
            num_features,img_features,targets=num_features.to(device),img_features.to(device),targets.to(device)
            dataset = MultimodalDataset(num_features, img_features, targets)
            dataloader = DataLoader(dataset, batch_size=32)
            running_loss = 0.0
            print("Starting training!!!")
            for num_inputs, img_inputs, targets_ in dataloader:
                # num_inputs, img_inputs, targets_ = num_inputs.to(device), img_inputs.to(device), targets_.to(device)

                # Set some of the values in num_inputs to -1 with a probability of 0.1
                mask = torch.rand_like(num_inputs) < 0.1
                num_inputs[mask] = -1

                optimizer.zero_grad()
                # outputs = model(num_inputs.unsqueeze(1), img_inputs)
                outputs = model(num_inputs, img_inputs)
                loss = criterion(outputs, targets_)
                if torch.isnan(loss):
                    print("NaN loss detected")
                    print("Numerical Inputs: ", num_inputs)
                    print("Image Inputs: ", img_inputs)
                    print("Outputs: ", outputs)
                    print("Targets: ", targets_)
                    break
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
                running_loss += loss.item()
            print(f'Epoch {epoch+1}, Loss: {running_loss/len(dataloader)}')
            print(f"Features: {combined_features.shape}, Targets: {targets.shape}")
            
            # print(features)
            # print(targets)
            # print("Loaded data for",station_name,start_date,end_date)
            

Epoch 1
Loading data for Keelung from 2022-01-01 00:00:00 to 2022-03-01 00:00:00
Loaded num data!!!
Loaded image data!!!
Resized image data!!!
Processed num data!!!
Generated combined features and targets!!!
Loaded data for Keelung 2022-01-01 00:00:00 2022-03-01 00:00:00
Starting training!!!
Epoch 1, Loss: 367.9713550143772
Features: torch.Size([1416, 150548]), Targets: torch.Size([1416, 18])
Loading data for Keelung from 2022-03-01 00:00:00 to 2022-05-01 00:00:00
Loaded num data!!!
Loaded image data!!!
Resized image data!!!
Processed num data!!!
Generated combined features and targets!!!
Loaded data for Keelung 2022-03-01 00:00:00 2022-05-01 00:00:00
Starting training!!!
Epoch 1, Loss: 439.25462490579355
Features: torch.Size([1464, 150548]), Targets: torch.Size([1464, 18])
Loading data for Keelung from 2022-05-01 00:00:00 to 2022-07-01 00:00:00
Loaded num data!!!
Loaded image data!!!
Resized image data!!!
Processed num data!!!
Generated combined features and targets!!!
Loaded data for

KeyboardInterrupt: 

In [None]:
#save the model to disk
model_path='multimodal_net_gru_mobilenet_large.pth'
torch.save({
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': running_loss,
}, model_path)
print(f'Model saved to {model_path}')