#### Looking into readLabels function with iPython

In [10]:
from PIL import Image
import numpy as np
import os, math
import csv

In [11]:
# read in training images from given image directory
def readImages(imgDir) :
  row = 171
  col = 256
  images = np.zeros((len(os.listdir(imgDir)), row * col))
  imageIds = []
  index = 0
  for imageName in os.listdir(imgDir) :
      if not imageName.endswith('.jpg') :
          continue
      im = Image.open(imgDir + imageName)
      imgId = imageName.replace('.jpg', '')
      data = np.zeros(row * col)
      arr2d = np.zeros((row, col))
      pixels = im.load()
      for i in range(row):
          for j in range(col):
              r, g, b =  pixels[j, i]
              #print(r, g, b)
              # convert rgb to greyscale
              data[i * col + j] = 0.2989 * r + 0.5870 * g + 0.1140 * b
              arr2d[i, j] = data[i * col + j]
      images[index, :] = data[:]
      imageIds.append(imgId)
      index += 1
      print(str(index) + '/' + str(len(os.listdir(imgDir))))
  return imageIds, images

def isNumber(s):
    try:
        float(s)
        return True
    except ValueError:
        return False

In [20]:
# read in all labels from .csv file (including the ones not in training data)
# ans store them in a hashmap, where key is the image id, and value is price bin
def readLabels(labelPath) :
    priceBins = {}
    binSize = 50
    with open(labelPath, 'r') as f:
        reader = csv.reader(f)
        priceList = list(reader)
        #print priceList

    for i in range(len(priceList)) :
        if priceList[i][0] == 'id' :
          continue
        id = priceList[i][0]
        price = priceList[i][1]
        price = price.replace('$', '')
        if not isNumber(price) :
           price = 0
        price = float(price)
        priceBins[id] = int(math.floor(price / binSize))
    return priceBins

# read in data
imgDirTrain = 'bos100/train/'
imgDirTest = 'bos100/test/'
labelPath= 'labels/bosPrices.csv'

In [22]:
labels = readLabels(labelPath)

[['id', 'price'], ['3353', '$50.00'], ['3781', '$150.00'], ['5453', '$150.00'], ['5506', '$145.00'], ['6695', '$195.00'], ['6976', '$65.00'], ['8789', '$155.00'], ['8792', '$200.00'], ['9273', '$225.00'], ['9765', '$229.00'], ['9824', '$209.00'], ['9855', '$255.00'], ['9857', '$359.00'], ['9858', '$449.00'], ['9860', '$239.00'], ['9870', '$279.00'], ['9903', '$259.00'], ['10730', '$175.00'], ['10758', '$115.00'], ['10807', '$80.00'], ['10809', '$100.00'], ['10810', '$85.00'], ['12356', '$210.00'], ['12441', '$489.00'], ['13059', '$375.00'], ['13589', '$451.00'], ['13592', '$389.00'], ['18711', '$126.00'], ['19999', '$70.00'], ['20000', '$70.00'], ['22208', '$225.00'], ['22212', '$285.00'], ['22354', '$135.00'], ['23370', '$289.00'], ['23619', ''], ['23668', '$299.00'], ['24063', '$120.00'], ['24240', '$225.00'], ['25142', '$369.00'], ['25418', '$277.00'], ['27141', '$279.00'], ['27546', '$60.00'], ['27611', '$165.00'], ['28150', '$251.00'], ['29155', '$451.00'], ['29765', '$295.00'], [

In [13]:
# read in train, test images
print("loading in train images...")
imageIdsTrain, imagesTrain = readImages(imgDirTrain)
print("loading in test images...")
imageIdsTest, imagesTest = readImages(imgDirTest)

loading in train images...
1/90
2/90
3/90
4/90
5/90
6/90
7/90
8/90
9/90
10/90
11/90
12/90
13/90
14/90
15/90
16/90
17/90
18/90
19/90
20/90
21/90
22/90
23/90
24/90
25/90
26/90
27/90
28/90
29/90
30/90
31/90
32/90
33/90
34/90
35/90
36/90
37/90
38/90
39/90
40/90
41/90
42/90
43/90
44/90
45/90
46/90
47/90
48/90
49/90
50/90
51/90
52/90
53/90
54/90
55/90
56/90
57/90
58/90
59/90
60/90
61/90
62/90
63/90
64/90
65/90
66/90
67/90
68/90
69/90
70/90
71/90
72/90
73/90
74/90
75/90
76/90
77/90
78/90
79/90
80/90
81/90
82/90
83/90
84/90
85/90
86/90
87/90
88/90
89/90
90/90
loading in test images...
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10


In [19]:
# ground truth (labels)
priceBins = readLabels(labelPath)
print(priceBins)


{'3250739': 1, '8064306': 1, '3479660': 2, '2183058': 3, '8584081': 1, '6715093': 3, '6480967': 6, '6019641': 3, '6716295': 1, '7323567': 1, '8579799': 5, '7106162': 4, '5270703': 3, '3285585': 0, '7592616': 2, '6414143': 3, '451872': 1, '7095032': 4, '7357784': 1, '3969526': 5, '7379913': 2, '7809904': 3, '6778788': 3, '7698631': 7, '6744795': 5, '5088560': 9, '5870922': 2, '716245': 2, '8054701': 5, '1612842': 5, '6974627': 3, '5370684': 1, '7911936': 2, '8200839': 2, '3049206': 4, '7755144': 4, '7964336': 0, '6956835': 2, '4278873': 1, '2828426': 4, '3802809': 2, '6971836': 5, '195515': 4, '6391260': 1, '8450098': 3, '7986820': 1, '7880828': 1, '8129715': 3, '3903273': 4, '8469841': 1, '8536127': 3, '7005442': 1, '6851460': 3, '8371115': 4, '6887053': 3, '7342359': 5, '7825327': 9, '5970118': 3, '7841193': 1, '2611177': 7, '7082775': 2, '4348422': 3, '5932267': 5, '6121709': 2, '8339718': 3, '7868931': 5, '6936712': 3, '6677640': 4, '7367795': 3, '2395326': 4, '5049582': 1, '6831553

In [None]:
# find label (prince bin) for 
# each data point in training dataset
labelsTrain, labelsTest = [], []
for id in imageIdsTrain:
    labelsTrain.append(priceBins[id])
for id in imageIdsTest:
    labelsTest.append(priceBins[id])

In [18]:
priceBins

{'3250739': 1,
 '8064306': 1,
 '3479660': 2,
 '2183058': 3,
 '8584081': 1,
 '6715093': 3,
 '6480967': 6,
 '6019641': 3,
 '6716295': 1,
 '7323567': 1,
 '8579799': 5,
 '7106162': 4,
 '5270703': 3,
 '3285585': 0,
 '7592616': 2,
 '6414143': 3,
 '451872': 1,
 '7095032': 4,
 '7357784': 1,
 '3969526': 5,
 '7379913': 2,
 '7809904': 3,
 '6778788': 3,
 '7698631': 7,
 '6744795': 5,
 '5088560': 9,
 '5870922': 2,
 '716245': 2,
 '8054701': 5,
 '1612842': 5,
 '6974627': 3,
 '5370684': 1,
 '7911936': 2,
 '8200839': 2,
 '3049206': 4,
 '7755144': 4,
 '7964336': 0,
 '6956835': 2,
 '4278873': 1,
 '2828426': 4,
 '3802809': 2,
 '6971836': 5,
 '195515': 4,
 '6391260': 1,
 '8450098': 3,
 '7986820': 1,
 '7880828': 1,
 '8129715': 3,
 '3903273': 4,
 '8469841': 1,
 '8536127': 3,
 '7005442': 1,
 '6851460': 3,
 '8371115': 4,
 '6887053': 3,
 '7342359': 5,
 '7825327': 9,
 '5970118': 3,
 '7841193': 1,
 '2611177': 7,
 '7082775': 2,
 '4348422': 3,
 '5932267': 5,
 '6121709': 2,
 '8339718': 3,
 '7868931': 5,
 '6936712': 3