In [1]:
import pandas as pd
import numpy as np
from numpy.linalg import lstsq, solve
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import os
import torchvision.transforms as transforms
from PIL import Image, ImageOps

In [2]:
file_path = 'data/socal.csv'
df = pd.read_csv(file_path)

# Folder where the images are stored
image_folder = 'data/socal_pics'

# Create a new column 'image_path' by constructing the path based on 'image_id'
df['image_path'] = df['image_id'].apply(lambda x: os.path.join(image_folder, f"{x}.jpg"))

# Display the first few rows of the data
df.head()

Unnamed: 0,image_id,street,citi,n_citi,bed,bath,sqft,price,image_path
0,0,1317 Van Buren Avenue,"Salton City, CA",317,3,2.0,1560,201900,data/socal/socal_pics\0.jpg
1,1,124 C Street W,"Brawley, CA",48,3,2.0,713,228500,data/socal/socal_pics\1.jpg
2,2,2304 Clark Road,"Imperial, CA",152,3,1.0,800,273950,data/socal/socal_pics\2.jpg
3,3,755 Brawley Avenue,"Brawley, CA",48,3,1.0,1082,350000,data/socal/socal_pics\3.jpg
4,4,2207 R Carrillo Court,"Calexico, CA",55,4,3.0,2547,385100,data/socal/socal_pics\4.jpg


In [3]:
class RealEstateDataset(Dataset):
    def __init__(self, csv_file, image_folder, transform=None):
        self.data = pd.read_csv(csv_file)
        self.image_folder = image_folder
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_id = self.data.iloc[idx]['image_id']
        image_path = f"{self.image_folder}/{image_id}.jpg"

        # Check if the image file exists
        if os.path.exists(image_path):
            image = Image.open(image_path).convert('RGB')
        else:
            print(f"Warning: Image {image_path} not found. Using a blank image.")
            image = Image.new('RGB', (64, 64), color=(255, 255, 255))  # Create a blank image

        if self.transform:
            image = self.transform(image)

        sqft = torch.tensor(self.data.iloc[idx]['sqft'], dtype=torch.float32)
        price = torch.tensor(self.data.iloc[idx]['price'], dtype=torch.float32)

        return image, sqft, price


In [4]:
class PricePredictor(nn.Module):
    def __init__(self):
        super(PricePredictor, self).__init__()

        # CNN for image processing
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten()
        )

        # Get the output size of the CNN
        self.cnn_output_size = 512  

        # Fully connected layer for `sqft`
        self.fc_sqft = nn.Linear(1, 64)

        # Final layers for combined data
        self.fc_combined = nn.Sequential(
            nn.Linear(self.cnn_output_size + 64, 128),
            nn.ReLU(),
            nn.Linear(128, 1)  # Output layer for price
        )

    def forward(self, image, sqft):
        # CNN part for image
        img_features = self.cnn(image)

        # Fully connected layer for sqft
        sqft_features = self.fc_sqft(sqft.view(-1, 1))

        # Concatenate both features
        combined_features = torch.cat((img_features, sqft_features), dim=1)

        # Pass through final layers
        price = self.fc_combined(combined_features)

        return price

In [5]:
# Define transforms
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

# Initialize dataset and dataloader
csv_file = 'data/socal.csv'
image_folder = 'data/socal_pics'
dataset = RealEstateDataset(csv_file, image_folder, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Initialize the model, loss function, and optimizer
model = PricePredictor()
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    for images, sqft, prices in dataloader:
        # Forward pass
        predictions = model(images, sqft)
        loss = criterion(predictions.view(-1), prices)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")


Epoch [1/10], Loss: 80674717696.0000
Epoch [2/10], Loss: 74430316544.0000
Epoch [3/10], Loss: 72079622144.0000
Epoch [4/10], Loss: 56669024256.0000
Epoch [5/10], Loss: 52540506112.0000
Epoch [6/10], Loss: 103437680640.0000
Epoch [7/10], Loss: 41304457216.0000
Epoch [8/10], Loss: 48655056896.0000
Epoch [9/10], Loss: 64363495424.0000
Epoch [10/10], Loss: 111521447936.0000
