In [1]:
import os
import pandas as pd
from PIL import Image,UnidentifiedImageError
import numpy as np
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler

class MultimodalDataset(Dataset):
    def __init__(self, numerical_data, image_data_folder, transform=None):
        self.numerical_data = numerical_data
        self.image_data_folder = image_data_folder
        self.transform = transform
        self.image_data = self._load_image_data()

    def _load_image_data(self):
        image_data = {}
        for root, dirs, files in os.walk(self.image_data_folder):
            for dir_name in dirs:
                date_str = dir_name
                image_data[date_str] = {}
                dir_path = os.path.join(root, dir_name)
                for hour in range(24):
                    img_path = os.path.join(dir_path, f"009-{date_str}{hour:02d}00.jpg")
                    try:
                        if os.path.exists(img_path):
                            image_data[date_str][hour] = img_path
                        else:
                            image_data[date_str][hour] = None
                    except(OSError,UnidentifiedImageError):
                        image_data[date_str][hour] = None
        return image_data

    def __len__(self):
        return len(self.numerical_data)

    def __getitem__(self, idx):
        row = self.numerical_data.iloc[idx]
        date = row['date'].strftime('%Y%m%d')
        hour = row['hour']
        numerical_features = row.drop(['date', 'hour']).values.astype(np.float32)
        
        img_path = self.image_data[date][hour]
        if img_path:
            img = Image.open(img_path)
            if self.transform:
                img = self.transform(img)
        else:
            img = torch.zeros(3, 224, 224)  # Handle missing images
        
        return numerical_features, img, row['value']  # Assuming the target is the value for that hour

# Preprocess numerical data
numerical_data = pd.read_csv('./dataset/Linkou_2022.csv')
#drop the Stations column
numerical_data = numerical_data.drop(columns=['Station'])
numerical_data['date'] = pd.to_datetime(numerical_data['date'],format="%d-%m-%Y %H:%M",dayfirst=True)
numerical_data = numerical_data.melt(id_vars=['date', 'measurement'], var_name='hour', value_name='value')
numerical_data['hour'] = numerical_data['hour'].astype(int)

#clean the data
# Check for non-numeric values in 'value' column
non_numeric_values = numerical_data[~numerical_data['value'].apply(lambda x: np.issubdtype(type(x), np.number))]

# Print non-numeric values for debugging
print("Non-numeric values found in the data:")
print(non_numeric_values)

# Convert 'value' column to numeric, coercing errors
numerical_data['value'] = pd.to_numeric(numerical_data['value'], errors='coerce')

# Handle missing values if any (e.g., fill with zero or drop)
numerical_data['value'].fillna(0, inplace=True)

#Pivot the table
numerical_data = numerical_data.pivot_table(index=['date', 'hour'], columns='measurement', values='value').reset_index()

# Normalize numerical data
scaler = StandardScaler()
numerical_data.iloc[:, 2:] = scaler.fit_transform(numerical_data.iloc[:, 2:])

# Image preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create Dataset and DataLoader
image_data_folder = './dataset/009_Linkou'
# sequence_length = 24
dataset = MultimodalDataset(numerical_data, image_data_folder, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)


Non-numeric values found in the data:
             date measurement  hour value
0      2022-01-01    AMB_TEMP     0  13.5
1      2022-01-01          CO     0  0.21
2      2022-01-01          NO     0   0.4
3      2022-01-01         NO2     0   6.8
4      2022-01-01         NOx     0   7.3
...           ...         ...   ...   ...
131755 2037-01-02         SO2    23   0.6
131756 2037-01-03       WD_HR    23    60
131757 2037-01-04  WIND_DIREC    23    52
131758 2037-01-05  WIND_SPEED    23   2.1
131759 2037-01-06       WS_HR    23   2.1

[131400 rows x 4 columns]


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  numerical_data['value'].fillna(0, inplace=True)


In [2]:
import torch.nn as nn
import torch.optim as optim
import torchvision
# Define CNN model
class ImageCNN(nn.Module):
    def __init__(self):
        super(ImageCNN, self).__init__()
        self.cnn = torchvision.models.resnet50(pretrained=True)
        self.cnn = nn.Sequential(*list(self.cnn.children())[:-1])
        self.fc = nn.Linear(2048, 128)

    def forward(self, x):
        x = self.cnn(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Define LSTM model for numerical data
class NumericalLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(NumericalLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 128)

    def forward(self, x):
        h_0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device)
        c_0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device)
        out, _ = self.lstm(x, (h_0, c_0))
        out = self.fc(out[:, -1, :])
        return out

# Combine CNN and LSTM outputs
class MultimodalNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(MultimodalNet, self).__init__()
        self.image_cnn = ImageCNN()
        self.numerical_lstm = NumericalLSTM(input_size, hidden_size, num_layers)
        self.fc1_combined = nn.Linear(256, 64)
        self.fc2_combined = nn.Linear(64, 1)

    def forward(self, num_data, img_data):
        img_features = self.image_cnn(img_data)
        num_features = self.numerical_lstm(num_data)
        combined_features = torch.cat((num_features, img_features), dim=1)
        x = torch.relu(self.fc1_combined(combined_features))
        x = self.fc2_combined(x)
        return x


In [3]:
# Hyperparameters
input_size = 15  # Number of numerical features
hidden_size = 128
num_layers = 2
num_epochs = 20
learning_rate = 0.001

# Instantiate model, loss function, and optimizer
print(torch.cuda.is_available())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MultimodalNet(input_size, hidden_size, num_layers).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    print("Epoch: ",epoch+1)
    model.train()
    print("Model set to training mode")
    running_loss = 0.0
    i=0
    for num_features, img_features, targets in dataloader:
        print(f"Batch: {i+1}")
        num_features, img_features, targets = num_features.to(device), img_features.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(num_features.unsqueeze(1), img_features)
        loss = criterion(outputs.squeeze(), targets.float())
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        i+=1
    print(f'Epoch {epoch+1}, Loss: {running_loss/len(dataloader)}')


True




Epoch:  1
Model set to training mode
