In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image

In [5]:
df= pd.read_excel("cleaned_data.xlsx")
df.head()

Unnamed: 0.1,Unnamed: 0,price,other_details,images,accepted_currency,city,sub,no_of_bedrooms,no_of_bathrooms
0,0,125000.0,Appliances\nWater Heater\nWardrobes\nAmenities...,['https://efiewura.com/img/properties/executiv...,GHC,Accra,Other,4,4
1,5,2822500.0,Appliances\nDishwasher\nMicrowave\nAir Conditi...,['https://efiewura.com/img/properties/4_bedroo...,USD,Accra,Legon,4,5
2,6,10725500.0,Appliances\nDishwasher\nWater Heater\nMicrowav...,['https://efiewura.com/img/properties/5-bedroo...,USD,Accra,Cantonments,5,6
3,8,2929755.0,Appliances\nWater Heater\nMicrowave\nAir Condi...,['https://efiewura.com/img/properties/2-bedroo...,USD,Accra,Airport,2,2
4,9,2032200.0,\nDescription\n2 Bedroom Apartment for Sale at...,['https://efiewura.com/img/properties/2_bedroo...,USD,Accra,Other,2,2


In [6]:
# Define a custom dataset to handle the tabular and image data
class HouseDataset(Dataset):
    def __init__(self, tabular_data, image_data, targets, transform=None):
        self.tabular_data = torch.tensor(tabular_data, dtype=torch.float32)
        self.image_data = image_data
        self.targets = torch.tensor(targets, dtype=torch.float32)
        self.transform = transform

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        tabular_sample = self.tabular_data[idx]
        image_sample = self.image_data[idx]
        target = self.targets[idx]

        if self.transform:
            image_sample = self.transform(image_sample)

        return tabular_sample, image_sample, target

In [7]:
# Preprocess the images using a pre-trained CNN (ResNet50)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [8]:
# Assuming the 'images' column contains lists of image URLs for each sample
df['image_features'] = df['images'].apply(lambda urls: [transform(Image.open(url)) for url in urls])
X_images = df['image_features'].tolist()

# Create DataLoader for the custom dataset
dataset = HouseDataset(tabular_data=X_tabular, image_data=X_images, targets=y, transform=transform)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)


FileNotFoundError: [Errno 2] No such file or directory: '['

In [None]:
# Define the combined model in PyTorch
class CombinedModel(nn.Module):
    def __init__(self):
        super(CombinedModel, self).__init__()
        self.cnn_model = torch.hub.load('pytorch/vision', 'resnet50', pretrained=True)
        self.cnn_model.fc = nn.Identity()  # Remove the last fully connected layer
        self.fc_tabular = nn.Sequential(
            nn.Linear(X_tabular.shape[1], 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU()
        )
        self.fc_combined = nn.Sequential(
            nn.Linear(2048 + 16, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, tabular_data, image_data):
        image_features = self.cnn_model(image_data)
        tabular_features = self.fc_tabular(tabular_data)
        combined_features = torch.cat((image_features, tabular_features), dim=1)
        output = self.fc_combined(combined_features)
        return output

In [None]:
# Create an instance of the combined model and move it to the GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CombinedModel().to(device)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters())

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    for tabular_data, image_data, targets in train_loader:
        tabular_data, image_data, targets = tabular_data.to(device), image_data.to(device), targets.to(device)

        # Forward pass
        outputs = model(tabular_data, image_data)
        loss = criterion(outputs.squeeze(), targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

In [None]:
# Evaluation (same as before)
# ...

# Making Predictions (same as before)
# ...