In [1]:
# features
# title
# listing description - right side page
# posting body - main description
# embedding - listing title + posting body 1536 dimensions
# image link - currently this would be the first image in the listing,
#              but we could also use all images in the listing.
#              Also, possible to make a single record for each image in the listing, with the same
#              title, description, embedding, and price and other features.
# cylinders - 6 cylinders (not sure range of values)
# drive - 2wd, 4wd, rwd (not sure range of values)
# fuel - gas, diesel, electric, hybrid (not sure range of values)
# odometer - miles
# transmission - automatic or manual (not sure range of values)
# type - truck or car

In [10]:
tacoma_listing_1 = {
    'title': "2022 Toyota Tacoma 4WD 4x4 Truck TRD Off Road Crew Cab - $37,697 (AutoNation Buick GMC Park Meadows)",
    'listing_description' : "2022 Toyota Tacoma 4WD",
    'posting_body' : """As America's Largest Automotive Retailer we have thousands of other vehicles, including many others that may be identical to this one.
                         Use the link below to view similar inventory from this dealership.
                        ↪ https://atmxi.com/CL-LEGe  🚗 
                        1A0F2A155056A981""",
    'image_link' : "https://images.craigslist.org/01515_l8eYKPRC4TH_0ak07K_600x450.jpg",
    'cylinders' : "6 cylinders",
    'drive' : "4wd",
    'fuel' : "gas",
    'odometer' : 38132,
    'transmission' : "automatic",
    'type' : "truck",
}
tacoma_listing_2 = {
    'title': "2022 Toyota Tacoma 4WD 4x4 Truck TRD Off Road Crew Cab - $37,697 (AutoNation Buick GMC Park Meadows)",
    'listing_description' : "2022 Toyota Tacoma 4WD",
    'posting_body' : """As America's Largest Automotive Retailer we have thousands of other vehicles, including many others that may be identical to this one.
                         Use the link below to view similar inventory from this dealership.
                        ↪ https://atmxi.com/CL-LEGe  🚗 
                        1A0F2A155056A981""",
    'image_link' : "https://images.craigslist.org/01515_l8eYKPRC4TH_0ak07K_600x450.jpg",
    'cylinders' : "4 cylinders",
    'drive' : "2wd",
    'fuel' : "gas",
    'odometer' : 28132,
    'transmission' : "manual",
    'type' : "truck",
}

In [11]:
data = [tacoma_listing_1, tacoma_listing_2]

In [12]:
# make df form data ultimately, this will take in a .csv that is created from a db query
df = pd.DataFrame(data)

In [None]:
df.head()

In [3]:
import requests
from PIL import Image
from io import BytesIO
import pandas as pd
import numpy as np

In [38]:
def download_image(image_url):
    response = requests.get(image_url)
    img = Image.open(BytesIO(response.content))
    return img

def one_hot_encode(df, categories):
    # Apply one-hot encoding to the specified categories
    df_encoded = pd.get_dummies(df, columns=categories, drop_first=False, prefix_sep='_', dtype=int)
    
    return df_encoded

def load_data(df, categories):
    # One-hot encode the specified categorical features
    df_encoded = one_hot_encode(df, categories)

    return df_encoded

one_hot_categories = ['cylinders', 'drive', 'fuel', 'transmission', 'type']


In [None]:
# the df now has all on hot categories embedded on hot style
features_df = load_data(df, one_hot_categories)

In [42]:
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader

In [81]:
class CarListingDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        """
        dataframe: A Pandas DataFrame containing the data.
        transform: PyTorch transforms to be applied to the images.
        """
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        # Get the data for one listing
        listing = self.dataframe.iloc[idx]
        # Download and process the image
        image = self.download_image(listing['image_link'])
        if self.transform:
            image = self.transform(image)
        else:
            #convert image
            # reorder to have 3 channels of 600x450  currently this results in 1 x 600 x 450 x 3
            image = np.array(image)
            image = torch.tensor(image, dtype=torch.float32)
            image = image.permute(2, 0, 1)  # Change from [H, W, C] to [C, H, W]

      
        # Extract and process other features (one-hot encoded)
        features = listing.drop('image_link')  # Assuming 'image_link' is the only non-feature column

        # Convert features and labels to tensors 
        # TODOD : need to combine and embedd text
        # features_tensor = torch.tensor(features.values, dtype=torch.float32)
        # make blank tensor for now
        features_tensor = torch.tensor(np.zeros(1536), dtype=torch.float32)

        return image, features_tensor

    @staticmethod
    def download_image(image_url):
        response = requests.get(image_url)
        img = Image.open(BytesIO(response.content)).convert('RGB')
        return img

transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize all images to 224x224
    transforms.ToTensor()           # Convert images to PyTorch tensors
])


In [82]:

dataset = CarListingDataset(df, transform=None)

# DataLoader with a batch size of 1
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)


In [83]:
for image, features in dataloader:
    print( image.shape)

torch.Size([1, 3, 450, 600])
torch.Size([1, 3, 450, 600])


In [84]:
import torch.nn as nn
import torch.nn.functional as F

In [85]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        # First Convolutional Layer
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        # now 300 x 225 x 16

        # Second Convolutional Layer
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        # after pooling 150 x 112 x 32

        # Third Convolutional Layer
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        #after pooling 75 x 56 x 64

        # Fully Connected Layer for Price Prediction
        self.fc1 = nn.Linear(in_features=64 * 75 * 56, out_features=126)
        self.fc2 = nn.Linear(in_features=126, out_features=1)

        # Fully Connected Layer for 126-dimension vector
        self.fc_vector = nn.Linear(in_features=64 * 75 * 56, out_features=126)

    def forward(self, x, output_type='price'):
        # Apply Convolutional layers and pooling layers
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))

        # Flatten the output for the dense layers
        x = x.view(-1, 64 * 75 * 56)

        if output_type == 'price':
            # Fully connected layers for price prediction
            x = F.relu(self.fc1(x))
            x = self.fc2(x)
            return x
        elif output_type == 'feature_vector':
            # Output a 126-dimensional feature vector
            x = self.fc_vector(x)
            return x


In [88]:
model = CNN()
model.eval()
image, features = next(iter(dataloader))

In [92]:
with torch.no_grad():  # Disable gradient computation for inference
    output = model(image, output_type='feature_vector')  # or 'feature_vector' based on your need

# Print the model's prediction
print("Predicted Prices:", output.shape)

Predicted Prices: torch.Size([1, 126])
