##This notebook contains the Naive CNN baseline and the LSTM.

In [None]:
!pip install tensorflow
import tensorflow as tf
!pip install tensorflow_hub
import tensorflow_hub as hub
from tensorflow import keras
from tensorflow.keras import layers
tf.compat.v1.enable_eager_execution()

import pathlib
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv
import random
import requests

In [None]:

with open('phillips2.csv') as f:
    lots = [{k: v for k, v in row.items()}
        for row in csv.DictReader(f, skipinitialspace=True)]

random.shuffle(lots)

In [None]:
#m = 8000 #Restricted dataset for colab run
#lots = lots[:m]

In [None]:
""" #If want to restrict to only expensive lots, as Verge & Singal did
i = 0
expensivelots = []
for lot in lots:
  if float(lot['price']) > 40000:
    expensivelots.append(lot)
    i += 1
print(i)
lots = expensivelots

###Preprocessing for Naive CNN:

In [None]:
m = len(lots)
#m = 5000 #for colab run
images = np.empty((m, 224, 224, 3)) 
prices = np.empty((m))

i = 0
for lot in lots[:m]: 
  j = i # index to use for accessing price
  try:
    image = tf.image.decode_jpeg(requests.get(lot["imageurl"]).content, channels=3)
    lastimage = image
  except:
    image = lastimage
    j -= 1 # going to use price from LAST image
  image = tf.image.convert_image_dtype(image, tf.float32)
  image = tf.image.resize(image, [224, 224])
  price = float(lot["price"])

  images[i,:,:,:] = image[:,:,:]
  prices[j] = price
  if i % 200 == 0:
    print(i)
  i += 1


In [None]:
fails = 0
for lot in lots:
  try:
    int(lot['day'])
    int(lot['month'])
    int(lot['year'])
  except:
    print(lot)
    lots.remove(lot)
    fails += 1
print(fails)
print(len(lots))

0
23825


###Random price selection MAPE (Baseline, to compare with Naive CNN)

In [None]:
def randomselect(k, n, lots):
  #MIGHT RETURN NONE!

  idx = random.randint(n+100, len(lots)-100)
  topredict = lots[idx]

  guess= random.randint(100, len(lots)-100)
  guessed = lots[guess]

  mape = 100 * abs(float(topredict['price']) - float(guessed['price'])) / float(topredict['price'])
  squarederror = abs(float(topredict['price']) - float(guessed['price'])) ** 2

  return squarederror
  

In [None]:
m = 1000000
sum = 0
for i in range(m):
  error = randomselect(0, 0, lots)
  sum += error
avgerror = sum / m
avgerror = np.sqrt(avgerror)

print(avgerror)

1570903.203739467


In [None]:
print(images)
print(prices)

In [None]:
m = len(lots)

train_size = int(0.7 * m)
val_size = int(0.15 * m)
test_size = int(0.15 * m)

img_train = images[:train_size,:,:,:]
img_val = images[train_size:train_size + val_size,:,:,:]
img_test = images[train_size + val_size:,:,:,:]

label_train = prices[:train_size]
label_val = prices[train_size:train_size + val_size]
label_test = prices[train_size + val_size:]

print(img_train.shape)
print(label_train.shape)


###Naive CNN

pretrained


In [None]:
input_shape = [224, 224, 3]
input = tf.keras.Input(input_shape)

restransfer = hub.KerasLayer("https://tfhub.dev/tensorflow/resnet_50/feature_vector/1", trainable=False)(input)
d1 = tf.keras.layers.Dense(128, activation="tanh", trainable=True, kernel_initializer=tf.keras.initializers.GlorotNormal, kernel_regularizer='l2')(restransfer)
dr1 = tf.keras.layers.Dropout(.2)(d1)
d2 = tf.keras.layers.Dense(32, activation="tanh", trainable=True, kernel_initializer=tf.keras.initializers.GlorotNormal, kernel_regularizer='l2')(dr1)
dr2 = tf.keras.layers.Dropout(.2)(d2)
 # inputs size (None, 224, 224, 3) ; outputs size (128)



prediction = tf.keras.layers.Dense(1, activation="relu", trainable=True, kernel_initializer=tf.keras.initializers.GlorotNormal)(dr2)
naivecnn = tf.keras.Model(inputs=[input], outputs=prediction)

vanilla

In [None]:
input_shape = [224, 224, 3]
input = tf.keras.Input(input_shape)

cnn = tf.keras.Sequential()
cnn.add(tf.keras.layers.Conv2D(32, kernel_size=(5, 5), strides=(1, 1),
                 activation='relu',
                 input_shape=input_shape))
cnn.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
cnn.add(tf.keras.layers.Conv2D(64, (5, 5), activation='relu'))
cnn.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
cnn.add(tf.keras.layers.Flatten())
cnn.add(tf.keras.layers.Dense(1000, activation='relu'))
cnn.add(tf.keras.layers.Dense(128, activation='relu'))

feature = cnn(input)
prediction = tf.keras.layers.Dense(1, activation="relu", trainable=True)(feature)
naivecnn = tf.keras.Model(inputs=[input], outputs=prediction)

In [None]:
tf.keras.backend.set_epsilon(1e-05)

naivecnn.compile(loss=tf.keras.losses.MeanSquaredError(), metrics=[tf.keras.metrics.MeanAbsolutePercentageError()], optimizer=keras.optimizers.Adam(learning_rate=.00001, clipvalue = 1))
naivecnn.summary()

assert not np.any(np.isnan(img_train))
assert not np.any(np.isnan(label_train))

history = naivecnn.fit([img_train], label_train, epochs=8)

In [None]:
naivecnn.evaluate([img_test], label_test)

#LSTM

In [None]:
n =  #sequence Length
m = #number of sequences to generate



lots = sorted(lots, key=lambda lot: lot['day'])
lots = sorted(lots, key=lambda lot: lot['month'])
lots = sorted(lots, key=lambda lot: lot['year'])


topred = np.zeros((m, 224, 224, 3))
lastn = np.zeros((m, n, 224, 224, 3))
lastnprices = np.zeros((m, n))
labels = np.zeros((m))



j = 0
while j < m:

  idx = random.randint(n, len(lots))
  topredict = lots[idx]
  
  lastnlots = lots[idx-n : idx]

  
  try:
    predimg = tf.image.decode_jpeg(requests.get(topredict["imageurl"]).content, channels=3)
    lastimage = predimg
  except:
    continue
  predimg = tf.image.convert_image_dtype(predimg, tf.float32)
  predimg = tf.image.resize(predimg, [224, 224])
  topred[j,:,:,:] = predimg[:,:,:]

  labels[j] = topredict["price"]

  k=0
  for lot in lastnlots:

    
    try:
      lotimg = tf.image.decode_jpeg(requests.get(lot["imageurl"]).content, channels=3)
      lastimage = lotimg
    except:
      lotimg = lastimage
    lotimg = tf.image.convert_image_dtype(lotimg, tf.float32)
    lotimg = tf.image.resize(lotimg, [224, 224])   
    lastn[j,k,:,:,:] = lotimg[:,:,:]
    
    lastnprices[j,k] = lot["price"]

    k += 1



  j += 1
  if j % 10 == 0:
    print(str(j) + ' training examples created')
    


In [None]:
train_size = int(0.7 * m)
val_size = int(0.15 * m)
test_size = int(0.15 * m)


topred_train = topred[:train_size,:,:,:]
topred_val = topred[train_size:train_size + val_size,:,:,:]
topred_test = topred[train_size + val_size:,:,:,:]

lastn_train = lastn[:train_size,:,:,:,:]
lastn_val = lastn[train_size:train_size + val_size,:,:,:,:]
lastn_test = lastn[train_size + val_size:,:,:,:,:]

price_train = lastnprices[:train_size,:]
price_val = lastnprices[train_size:train_size + val_size,:]
price_test = lastnprices[train_size + val_size:,:]

label_train = labels[:train_size]
label_val = labels[train_size:train_size + val_size]
label_test = labels[train_size + val_size:]

print(lastn_train.shape)
print(label_train.shape)

##Create LSTM

In [None]:
topred_shape = [224, 224, 3]
lastn_shape = [n, 224, 224, 3]
price_shape = [n]
topred_input = tf.keras.Input(topred_shape)
lastn_input = tf.keras.Input(lastn_shape)
price_input = tf.keras.Input(price_shape)

cnn = tf.keras.Sequential()
cnn.add(tf.keras.layers.Conv2D(8, kernel_size=(5, 5), strides=(1, 1),
                 activation='relu',
                 input_shape=input_shape))
cnn.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
cnn.add(tf.keras.layers.Conv2D(16, (5, 5), activation='relu'))
cnn.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
cnn.add(tf.keras.layers.Flatten())
cnn.add(tf.keras.layers.Dense(1000, activation='relu'))
cnn.add(tf.keras.layers.Dense(128, activation='relu'))

L1_layer = tf.keras.layers.Lambda(lambda tensors:tf.math.abs(tensors[0] - tensors[1]))

In [None]:


encoded_topred = cnn(topred_input)

for i in range(n):
  encoded_pastlot = cnn(lastn_input[:,i,:,:,:])
  L1_distance = L1_layer([encoded_topred, encoded_pastlot])
  print(L1_distance.shape)
  print(price_input[:,i].shape)
  price = tf.expand_dims(price_input[:,i], axis=1)
  priceandfeature = tf.concat([L1_distance, price], axis=1)
  priceandfeature = tf.expand_dims(priceandfeature, axis=1)


  if i == 0:
    lstm_input = priceandfeature
  else:
    lstm_input = tf.concat([lstm_input, priceandfeature], axis=1)




lstm = tf.keras.Sequential([
                            #tf.keras.layers.LSTM(200, activation='relu', return_sequences=True, input_shape=(n, 128+1)),
                            #tf.keras.layers.LSTM(100, activation='relu', return_sequences=True),
                            #tf.keras.layers.LSTM(50, activation='relu', return_sequences=True),
                            #tf.keras.layers.LSTM(25, activation='relu'),
                            #tf.keras.layers.Dense(20, activation='relu'),
                            #tf.keras.layers.Dense(10, activation='relu'),
                            #tf.keras.layers.Dense(1)
                            tf.keras.layers.LSTM(10, activation='relu', input_shape=(n, 128+1)),
                            tf.keras.layers.Dense(1)
                            #THIS VERSION IS FOR SINGLE LAYER LSTM; COMMENT UNCOMMENTED AND UNCOMMENT COMMENTED TO SWITCH
])

prediction = lstm(lstm_input)
lstm_net = tf.keras.Model(inputs=[topred_input, lastn_input, price_input], outputs=prediction)







In [None]:
lstm_net.compile(loss=tf.keras.losses.MeanSquaredError(), metrics=[tf.keras.metrics.MeanAbsolutePercentageError()], 
                 optimizer=keras.optimizers.Adam(learning_rate=.001))
lstm_net.summary()

history = lstm_net.fit([topred_train, lastn_train, price_train], label_train, epochs=8, steps_per_epoch=41)

In [None]:
lstm_net.evaluate([topred_test, lastn_test, price_test], label_test)

##Example Prediction

In [None]:
lstm_net.predict([topred_test[0:1,:,:,:], lastn_test[0:1,:,:,:,:], price_test[0:1,:]])

array([[157985.77]], dtype=float32)

In [None]:
print(label_test[0:1])

[105625.]


In [1]:
from PIL import Image

In [None]:
print(topred_test[0,:,:,:].shape)

(224, 224, 3)


In [None]:
image = Image.fromarray(topred_test[0,:,:,:], 'RGB')
image.show()