# ThePriceIsRight: Predicting Prices with Product Images

### Exploratory Notebook

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import datasets

#### Load the dataset

In [2]:
features, labels = datasets.load_boston(True)

#### Normalize and reshape data

In [3]:
m, n = features.shape

mu = np.mean(features, axis=0)
sigma = np.std(features, axis=0)
features = (features - mu) / sigma

# append column of ones for intercept
features = np.reshape(np.c_[np.ones(m), features], [m, n + 1])
labels = np.reshape(labels, [m, 1])

n = features.shape[1]

In [4]:
features[:4, :10]

array([[ 1.        , -0.41771335,  0.28482986, -1.2879095 , -0.27259857,
        -0.14421743,  0.41367189, -0.12001342,  0.1402136 , -0.98284286],
       [ 1.        , -0.41526932, -0.48772236, -0.59338101, -0.27259857,
        -0.74026221,  0.19427445,  0.36716642,  0.55715988, -0.8678825 ],
       [ 1.        , -0.41527165, -0.48772236, -0.59338101, -0.27259857,
        -0.74026221,  1.28271368, -0.26581176,  0.55715988, -0.8678825 ],
       [ 1.        , -0.41468015, -0.48772236, -1.30687771, -0.27259857,
        -0.83528384,  1.01630251, -0.80988851,  1.07773662, -0.75292215]])

#### Split dataset into 80% train, 20% dev

In [5]:
random_inds = np.random.rand(m) < 0.8

train_x = features[random_inds]
train_y = labels[random_inds]
test_x = features[~random_inds]
test_y = labels[~random_inds]

print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)

print(m)
print(n)

(394, 14)
(394, 1)
(112, 14)
(112, 1)
506
14


In [6]:
tf.reset_default_graph()

learning_rate = 0.01
num_epochs = 500

X = tf.placeholder(tf.float32, [None, n])
Y = tf.placeholder(tf.float32, [None, 1])
W = tf.get_variable('W', shape=[n, 1], dtype=tf.float32, initializer=tf.ones_initializer())

yhat = tf.matmul(X, W)
cost = tf.reduce_mean(tf.square(tf.subtract(yhat, Y)))

init = tf.global_variables_initializer()

minimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

In [7]:
sess = tf.Session()
sess.run(init)

for epoch in range(num_epochs):
    _, curr_cost = sess.run([minimizer, cost], feed_dict={X: train_x, Y: train_y})
    print(curr_cost)


590.861
551.239
517.152
487.46
461.283
437.94
416.902
397.757
380.187
363.939
348.817
334.665
321.359
308.8
296.907
285.617
274.874
264.634
254.861
245.52
236.586
228.033
219.84
211.989
204.461
197.242
190.316
183.67
177.293
171.171
165.294
159.652
154.235
149.033
144.038
139.24
134.633
130.208
125.958
121.876
117.955
114.189
110.571
107.095
103.757
100.549
97.4679
94.5075
91.6633
88.9307
86.3052
83.7825
81.3586
79.0295
76.7915
74.641
72.5744
70.5884
68.6799
66.8457
65.083
63.3889
61.7607
60.1957
58.6915
57.2456
55.8558
54.5199
53.2356
52.0011
50.8142
49.6732
48.5762
47.5215
46.5074
45.5324
44.5948
43.6933
42.8263
41.9926
41.1908
40.4197
39.6781
38.9648
38.2787
37.6187
36.9839
36.3732
35.7857
35.2205
34.6766
34.1534
33.65
33.1655
32.6993
32.2507
31.8189
31.4033
31.0033
30.6183
30.2477
29.8909
29.5474
29.2167
28.8982
28.5916
28.2963
28.0119
27.738
27.4742
27.2201
26.9753
26.7395
26.5123
26.2934
26.0824
25.8791
25.6831
25.4943
25.3122
25.1367
24.9675
24.8043
24.647
24.4952
24.3489
24.207

In [8]:
test_yhat = sess.run(yhat, feed_dict={X: test_x})
print(test_yhat)
mse = tf.reduce_mean(tf.square(tf.subtract(test_yhat, test_y)))
print('MSE: {}'.format(sess.run(mse)))

[[ 25.20354271]
 [ 29.27909088]
 [ 21.30224991]
 [ 22.40326881]
 [ 12.45475578]
 [ 14.45730686]
 [ 22.55679703]
 [ 22.28637695]
 [ 29.64716148]
 [ 32.81518555]
 [ 22.98203087]
 [ 20.44137764]
 [ 31.06452179]
 [ 17.3126564 ]
 [ 18.07878876]
 [ 23.42749214]
 [ 25.13637924]
 [ 21.42299271]
 [ 23.83537102]
 [ 30.79145622]
 [ 26.87556839]
 [ 32.96509933]
 [ 24.57605553]
 [ 17.85645294]
 [ 20.2213974 ]
 [ 21.05271339]
 [ 20.48180389]
 [ 20.52493095]
 [ 21.14009285]
 [ 17.99607277]
 [  4.99783325]
 [ 19.26449585]
 [ 21.88262558]
 [ 25.18382072]
 [ 25.21497345]
 [ 28.25650978]
 [ 32.26791   ]
 [ 35.27894974]
 [ 36.54459   ]
 [ 32.76958084]
 [ 31.64266968]
 [ 34.62636185]
 [ 31.60948753]
 [ 17.96391678]
 [ 22.73254204]
 [ 15.91921329]
 [ 25.11879349]
 [ 28.16566467]
 [ 31.05202103]
 [ 39.02994919]
 [ 41.14938736]
 [ 32.02048111]
 [ 30.13889503]
 [ 28.25418282]
 [ 20.40881157]
 [ 26.32467461]
 [ 34.59806442]
 [ 35.39669037]
 [ 34.45422745]
 [ 32.11650467]
 [ 26.40603828]
 [ 32.58621979]
 [ 20.09