In [None]:
import os
import pandas as pd
from pathlib import Path
from PIL import Image
import time
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
from perceiver_pytorch import PerceiverIO

# Dataset
class CarDataset(Dataset):
    def __init__(self, json_path, image_dir, transform=None, num_images=12, use_images=True, use_tabular=True):
        self.image_dir = image_dir
        self.transform = transform
        self.num_images = num_images
        self.use_images = use_images
        self.use_tabular = use_tabular

        print(f"Tworzę CarDataset: use_images={use_images}, use_tabular={use_tabular}")

        # Wczytaj dane JSON
        self.data = pd.read_json(json_path, lines=True)

        # Filtruj dane z przynajmniej jednym zdjęciem
        self.samples = []
        for _, row in self.data.iterrows():
            car_id = str(row["id"])  # Używamy pola `id` do identyfikacji ofert
            has_images = any(
                Path(self.image_dir, f"{car_id}_{i}.jpg").exists() for i in range(1, self.num_images + 1)
            )
            if (self.use_images and has_images) or not self.use_images:
                # Konwersja danych tabelarycznych na numeryczne
                if self.use_tabular:
                    try:
                        tabular_data = row.drop(["url", "cena", "id"]).astype(float).values
                    except Exception as e:
                        print(f"Błąd w danych tabelarycznych: {row}")
                        raise e

                self.samples.append({
                    "id": car_id,
                    "price": row["cena"],  # Cena samochodu
                    "tabular_data": torch.tensor(tabular_data, dtype=torch.float32) if self.use_tabular else None,
                    "url": row["url"] if "url" in row else None
                })

        print(f"Dataset: Wczytano {len(self.samples)} rekordów.")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        sample = self.samples[idx]
        car_id = sample["id"]
        price = torch.tensor(sample["price"], dtype=torch.float32)
        tabular_data = sample["tabular_data"]
        url = sample["url"]

        images = None
        if self.use_images:
            # Załaduj zdjęcia
            images = []
            for i in range(1, self.num_images + 1):
                image_path = Path(self.image_dir, f"{car_id}_{i}.jpg")
                if image_path.exists():
                    img = Image.open(image_path).convert("RGB")
                    if self.transform:
                        img = self.transform(img)
                    images.append(img)

            while len(images) < self.num_images:
                images.append(images[-1])

            images = images[:self.num_images]
            images = torch.stack(images)  # [NUM_IMAGES, C, H, W]

        return images, tabular_data, price, url

# Model
class MultimodalPerceiver(nn.Module):
    def __init__(self, image_dim=(3, 140, 100), tabular_dim=3, latent_dim=512, num_latents=256, num_images=12, use_images=True, use_tabular=True):
        super(MultimodalPerceiver, self).__init__()
        self.use_images = use_images
        self.use_tabular = use_tabular
        self.num_images = num_images

        # Perceiver IO
        self.perceiver = PerceiverIO(
            dim=latent_dim,
            queries_dim=1,  # Jedna wartość wyjściowa dla ceny
            logits_dim=latent_dim,   # Przepuszczamy przez latent_dim
            depth=6,        # Liczba warstw
            num_latents=num_latents,
            latent_dim=latent_dim
        )

        # Embeddingi dla obrazów
        if self.use_images:
            self.image_embedding = nn.Sequential(
                nn.Conv2d(image_dim[0], latent_dim, kernel_size=3, stride=1, padding=1),
                nn.ReLU(),
                nn.AdaptiveAvgPool2d((1, 1)),
                nn.Flatten()
            )

        # Embeddingi dla danych tabelarycznych
        if self.use_tabular:
            self.tabular_embedding = nn.Sequential(
                nn.Linear(tabular_dim, latent_dim),
                nn.ReLU(),
            )

        # Warstwa redukująca wymiar cech połączonych
        if self.use_images and self.use_tabular:
            self.combined_linear = nn.Linear(latent_dim * 2, latent_dim)

        # Warstwa wyjściowa regresji
        self.output_layer = nn.Linear(latent_dim, 1)

    def forward(self, images=None, tabular_data=None):
        image_features = None
        tabular_features = None

        # Przetwarzanie obrazów
        if self.use_images and images is not None:
            batch_size = images.size(0)
            image_features = []
            for i in range(self.num_images):
                img_emb = self.image_embedding(images[:, i, :, :, :])  # [BATCH_SIZE, LATENT_DIM]
                image_features.append(img_emb)
            image_features = torch.stack(image_features, dim=1)  # [BATCH_SIZE, NUM_IMAGES, LATENT_DIM]
            image_features = image_features.mean(dim=1)  # Uśrednianie po obrazach

        # Przetwarzanie danych tabelarycznych
        if self.use_tabular and tabular_data is not None:
            tabular_features = self.tabular_embedding(tabular_data)  # [BATCH_SIZE, LATENT_DIM]

        # Łączenie cech
        if self.use_images and self.use_tabular:
            combined_features = torch.cat((image_features, tabular_features), dim=1)  # [BATCH_SIZE, LATENT_DIM * 2]
            combined_features = self.combined_linear(combined_features)  # Redukcja do [BATCH_SIZE, LATENT_DIM]
        elif self.use_images:
            combined_features = image_features
        elif self.use_tabular:
            combined_features = tabular_features

        # Dodanie wymiaru sekwencji
        combined_features = combined_features.unsqueeze(1)  # [BATCH_SIZE, 1, LATENT_DIM]

        # Przepuszczanie przez Perceiver IO
        perceiver_output = self.perceiver(combined_features)  # [BATCH_SIZE, num_latents, 1]

        # Uśrednianie po num_latents
        perceiver_output = perceiver_output.mean(dim=1)  # [BATCH_SIZE, 1]

        # Wyjście ostateczne: [BATCH_SIZE, 1]
        output = self.output_layer(perceiver_output)  # [BATCH_SIZE, 1]
        return output.squeeze(1)  # [BATCH_SIZE]


# Trening
def train_model(json_path, image_dir, model, criterion, optimizer, num_epochs=10, batch_size=16, use_images=True, use_tabular=True):
    transform = transforms.Compose([
        transforms.Resize((140, 100)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    dataset = CarDataset(json_path=json_path, image_dir=image_dir, transform=transform, use_images=use_images, use_tabular=use_tabular)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        start_time_epoch = time.time()

        for i, (images, tabular_data, prices, urls) in enumerate(dataloader):
            iter_start_time = time.time()

            images = images.to(DEVICE) if images is not None else None
            tabular_data = tabular_data.to(DEVICE) if tabular_data is not None else None
            prices = prices.to(DEVICE)

            optimizer.zero_grad()
            outputs = model(images=images, tabular_data=tabular_data)
            loss = criterion(outputs, prices)
            loss.backward()
            optimizer.step()

            iter_time = time.time() - iter_start_time
            epoch_loss += loss.item()

            print(f"Iteracja {i+1}/{len(dataloader)}, Loss: {loss.item():.4f}, Time: {iter_time:.2f}s")

        epoch_time = time.time() - start_time_epoch
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(dataloader):.4f}, Time: {epoch_time:.2f}s")

    # Zapis modelu
    input_config = f"images-{use_images}_tabular-{use_tabular}"
    model_name = f"car_price_predictor_{input_config}.pth"
    model_path = os.path.join("../models", model_name)
    os.makedirs(os.path.dirname(model_path), exist_ok=True)
    torch.save(model.state_dict(), model_path)
    print(f"Model zapisany w: {model_path}")

# Parametry
JSON_PATH = "../data/clean_data_train_01.json"
IMAGE_DIR = "../data/images/"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model, funkcja kosztu, optymalizator
tabular_dim = len(CarDataset(json_path=JSON_PATH, image_dir=IMAGE_DIR, use_images=False).samples[0]["tabular_data"])
model = MultimodalPerceiver(image_dim=(3, 140, 100), tabular_dim=tabular_dim, latent_dim=512, num_images=12, use_images=True, use_tabular=True).to(DEVICE)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Trenowanie
train_model(JSON_PATH, IMAGE_DIR, model, criterion, optimizer, num_epochs=10, batch_size=16, use_images=True, use_tabular=True)


In [None]:
def load_model(model_path, image_dim=(3, 140, 100), tabular_dim=3, latent_dim=512, num_latents=256, num_images=12, use_images=True, use_tabular=True):
    """
    Ładuje zapisany model z pliku.
    
    Args:
        model_path (str): Ścieżka do pliku modelu.
        image_dim (tuple): Wymiary wejściowych obrazów.
        tabular_dim (int): Wymiar danych tabelarycznych.
        latent_dim (int): Wymiar latentny modelu.
        num_latents (int): Liczba latentów w Perceiver IO.
        num_images (int): Maksymalna liczba obrazów na ofertę.
        use_images (bool): Czy używać danych obrazowych.
        use_tabular (bool): Czy używać danych tabelarycznych.

    Returns:
        torch.nn.Module: Załadowany model.
    """
    model = MultimodalPerceiver(
        image_dim=image_dim,
        tabular_dim=tabular_dim,
        latent_dim=latent_dim,
        num_latents=num_latents,
        num_images=num_images,
        use_images=use_images,
        use_tabular=use_tabular
    ).to(DEVICE)
    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    model.eval()
    print(f"Model załadowany z: {model_path}")
    return model

def evaluate_model(model, test_json_path, image_dir, batch_size=16, use_images=True, use_tabular=True):
    """
    Przeprowadza ewaluację modelu na danych testowych.
    
    Args:
        model (torch.nn.Module): Wytrenowany model do ewaluacji.
        test_json_path (str): Ścieżka do pliku JSON z danymi testowymi.
        image_dir (str): Katalog z obrazami.
        batch_size (int): Rozmiar batcha.
        use_images (bool): Czy używać danych obrazowych.
        use_tabular (bool): Czy używać danych tabelarycznych.

    Returns:
        tuple: Średni błąd kwadratowy (MSE) i średni błąd procentowy (MAPE) na danych testowych.
    """
    transform = transforms.Compose([
        transforms.Resize((140, 100)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # Przygotowanie danych testowych
    test_dataset = CarDataset(
        json_path=test_json_path,
        image_dir=image_dir,
        transform=transform,
        use_images=use_images,
        use_tabular=use_tabular
    )
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Ewaluacja modelu
    model.eval()
    total_loss = 0
    total_percentage_error = 0
    total_samples = 0
    criterion = nn.MSELoss()
    
    with torch.no_grad():
        for images, tabular_data, prices, urls in test_loader:
            images = images.to(DEVICE) if images is not None else None
            tabular_data = tabular_data.to(DEVICE) if tabular_data is not None else None
            prices = prices.to(DEVICE)

            # Przewidywanie i obliczanie błędów
            outputs = model(images=images, tabular_data=tabular_data)
            loss = criterion(outputs, prices)
            total_loss += loss.item()

            # Obliczanie błędu procentowego
            absolute_percentage_error = torch.abs((outputs - prices) / prices) * 100
            total_percentage_error += absolute_percentage_error.sum().item()
            total_samples += len(prices)

    avg_loss = total_loss / len(test_loader)
    avg_percentage_error = total_percentage_error / total_samples
    print(f"Średni błąd kwadratowy (MSE) na danych testowych: {avg_loss:.4f}")
    print(f"Średni błąd procentowy (MAPE) na danych testowych: {avg_percentage_error:.2f}%")
    return avg_loss, avg_percentage_error


# Ścieżki do danych testowych i modelu
TEST_JSON_PATH = "../data/clean_data_test_01.json"
IMAGE_DIR = "../data/images/"
MODEL_PATH = "../models/car_price_predictor_images-tabular.pth"

# Ładowanie modelu
#tabular_dim = len(CarDataset(json_path=TEST_JSON_PATH, image_dir=IMAGE_DIR, use_images=False).samples[0]["tabular_data"])
model = load_model(
    model_path=MODEL_PATH,
    image_dim=(3, 140, 100),
    tabular_dim=tabular_dim,
    latent_dim=512,
    num_latents=256,
    num_images=12,
    use_images=True,
    use_tabular=True
)

# Ewaluacja modelu
evaluate_model(model, test_json_path=TEST_JSON_PATH, image_dir=IMAGE_DIR, batch_size=16, use_images=True, use_tabular=True)