# Building an image retrieval system with deep features
---

Exploring image retrival with graplab create
- Load the CIFAR-10 dataset
- Popular benchmark dataset in computer vision called CIFAR-10.  
- 4 categories = {'cat','bird','automobile','dog'}.)
- Dataset is already split into a training set and test set.

In [5]:
import turicreate as tc

In [6]:
image_train = tc.SFrame('datasets/image_train_data/')
image_test = tc.SFrame('datasets/image_test_data/')

### Computing deep features images

Compute deep features.  This computation takes a little while, so we have already computed them and saved the results as a column in the data you loaded.   

(Note that if I want to compute such deep features and have a GPU on your machine, you should use the GPU enabled GraphLab Create, which will be significantly faster for this task.)

In [7]:
#deep_learning_model = graphlab.load_model('http://s3.amazonaws.com/GraphLab-Datasets/deeplearning/imagenet_model_iter45')
#image_train['deep_features'] = deep_learning_model.extract_features(image_train)

In [8]:
image_train#.head()

id,image,label,deep_features,image_array
24,Height: 32 Width: 32,bird,"[0.242871761322, 1.09545373917, 0.0, ...","[73.0, 77.0, 58.0, 71.0, 68.0, 50.0, 77.0, 69.0, ..."
33,Height: 32 Width: 32,cat,"[0.525087952614, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[7.0, 5.0, 8.0, 7.0, 5.0, 8.0, 5.0, 4.0, 6.0, 7.0, ..."
36,Height: 32 Width: 32,cat,"[0.566015958786, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[169.0, 122.0, 65.0, 131.0, 108.0, 75.0, ..."
70,Height: 32 Width: 32,dog,"[1.12979578972, 0.0, 0.0, 0.778194487095, 0.0, ...","[154.0, 179.0, 152.0, 159.0, 183.0, 157.0, ..."
90,Height: 32 Width: 32,bird,"[1.71786928177, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[216.0, 195.0, 180.0, 201.0, 178.0, 160.0, ..."
97,Height: 32 Width: 32,automobile,"[1.57818555832, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[33.0, 44.0, 27.0, 29.0, 44.0, 31.0, 32.0, 45.0, ..."
107,Height: 32 Width: 32,dog,"[0.0, 0.0, 0.220677852631, 0.0, ...","[97.0, 51.0, 31.0, 104.0, 58.0, 38.0, 107.0, 61.0, ..."
121,Height: 32 Width: 32,bird,"[0.0, 0.23753464222, 0.0, 0.0, 0.0, 0.0, ...","[93.0, 96.0, 88.0, 102.0, 106.0, 97.0, 117.0, ..."
136,Height: 32 Width: 32,automobile,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.5737862587, 0.0, ...","[35.0, 59.0, 53.0, 36.0, 56.0, 56.0, 42.0, 62.0, ..."
138,Height: 32 Width: 32,bird,"[0.658935725689, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[205.0, 193.0, 195.0, 200.0, 187.0, 193.0, ..."


In [9]:
#image_train.show()#['label'].sketch_summary()

### Split 4 categories: 
- {‘dog’,’cat’,’automobile’,bird’}
- First Train data then test data

In [10]:
#gl.canvas.set_target('ipynb')

- Train Images

In [11]:
image_train_dog = image_train[image_train['label']=='dog']
image_train_cat = image_train[image_train['label']=='cat']
image_train_auto = image_train[image_train['label']=='automobile']
image_train_bird = image_train[image_train['label']=='bird']

In [12]:
print(image_train_dog['image'].explore(),image_train_cat['image'].explore(),
     image_train_auto['image'].explore(),image_train_bird['image'].explore())

(None, None, None, None)


In [13]:
image_train_dog['image'].explore()

- Test Images

In [14]:
image_test_dog=image_test[image_test['label']=='dog']
image_test_cat=image_test[image_test['label']=='cat']
image_test_auto=image_test[image_test['label']=='automobile']
image_test_bird=image_test[image_test['label']=='bird']

In [15]:
print(image_test_dog['image'].explore(),image_test_cat['image'].explore(),
     image_test_auto['image'].explore(),image_test_bird['image'].explore())

(None, None, None, None)


--- 
## K-Neares-Neighbors 

- Training a nearest-neighbors model on every subset reviewed
- Retrieving images using deep features
- We will now a simple image retrieval system that finds the nearest neighbors for any image.

In [16]:
knn_model = tc.nearest_neighbors.create(image_train, features=['deep_features'], label='id')

In [17]:
dog_model = tc.nearest_neighbors.create(image_train_dog, features=['deep_features'],label='id')

In [18]:
cat_model = tc.nearest_neighbors.create(image_train_cat, features=['deep_features'],label='id')

In [19]:
auto_model = tc.nearest_neighbors.create(image_train_auto, features=['deep_features'],label='id')

In [20]:
bird_model = tc.nearest_neighbors.create(image_train_bird, features=['deep_features'],label='id')

--- 
### FInding Particular Cases

- Take particular images
- Find the quality of deep features
- First results

In [21]:
cat = image_train[18:19]
cat['image'].explore()

In [22]:
def get_images_from_ids(query_result):
    # function to look for references
    return image_train.filter_by(query_result['reference_label'],'id')

In [23]:
knn_model.query(cat)
#vemos que el reference_label es'la imagen' por lo que se va a querer transformar el feature

query_label,reference_label,distance,rank
0,384,0.0,1
0,6910,36.9403137951,2
0,39777,38.4634888975,3
0,36870,39.7559623119,4
0,41734,39.7866014148,5


In [24]:
cat1 = image_test[0:1]
cat1['image'].explore()

In [25]:
query_cat1=cat_model.query(cat1)
query_cat1
#k-closest to cat1

query_label,reference_label,distance,rank
0,16289,34.623719208,1
0,45646,36.0068799284,2
0,32139,36.5200813436,3
0,25713,36.7548502521,4
0,331,36.8731228168,5


In [26]:
#mean distance 5 closest
query_cat1['distance'].mean()

36.15573070978301

In [27]:
cat1_cat1 = image_train_cat[image_train_cat['id']==16289]
cat1_cat1['image'].explore()

In [28]:
### Which is the closest dog to a cat
query2_cat1=dog_model.query(cat1)
query2_cat1

query_label,reference_label,distance,rank
0,16976,37.4642628784,1
0,13387,37.5666832169,2
0,35867,37.6047267079,3
0,44603,37.7065585153,4
0,6094,38.5113254907,5


In [29]:
#distancia promedio de los 5 mas cercanos
query2_cat1['distance'].mean()

37.770711361841634

In [30]:
dog1=image_train_dog[image_train_dog['id']==16976]
dog1['image'].explore()
#el perrito mas parecido a un gato :3

In [31]:
dog_model.query(image_test,k=1)

query_label,reference_label,distance,rank
0,16976,37.4642628784,1
1,3828,42.3025826356,1
2,40938,29.3472319585,1
3,1401,43.1386196575,1
4,49803,33.4773590373,1
5,5755,32.8458495684,1
6,20715,35.0397073189,1
7,47496,50.5112353783,1
8,13387,33.9010327697,1
9,12089,37.4849250909,1


###distancia de perros con las demas categorias

In [32]:
dog_auto_neighbors = auto_model.query(image_test_dog, k=1)
dog_bird_neighbors = bird_model.query(image_test_dog, k=1)
dog_cat_neighbors = cat_model.query(image_test_dog, k=1)
dog_dog_neighbors = dog_model.query(image_test_dog, k=1)

In [33]:
dog_auto_neighbors

query_label,reference_label,distance,rank
0,33859,41.9579761457,1
1,2046,46.0021331807,1
2,19594,42.9462290692,1
3,11000,41.6866060048,1
4,19594,39.2269664935,1
5,49314,40.5845117698,1
6,40822,45.1067352961,1
7,44997,41.3221140974,1
8,33859,41.8244654995,1
9,33859,45.4976929401,1


In [34]:
#hace la busqueda del query por id
get_images_from_ids(dog_cat_neighbors)

id,image,label,deep_features,image_array
33,Height: 32 Width: 32,cat,"[0.525087952614, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[7.0, 5.0, 8.0, 7.0, 5.0, 8.0, 5.0, 4.0, 6.0, 7.0, ..."
36,Height: 32 Width: 32,cat,"[0.566015958786, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[169.0, 122.0, 65.0, 131.0, 108.0, 75.0, ..."
331,Height: 32 Width: 32,cat,"[0.0, 0.0, 0.510963916779, 0.0, ...","[45.0, 65.0, 92.0, 72.0, 95.0, 110.0, 106.0, ..."
367,Height: 32 Width: 32,cat,"[1.38658058643, 0.0, 0.0, 0.0, 0.0, 0.182891070 ...","[168.0, 151.0, 143.0, 145.0, 130.0, 124.0, ..."
494,Height: 32 Width: 32,cat,"[0.0, 0.0539512038231, 1.95745122433, 0.0, 0.0, ...","[26.0, 34.0, 29.0, 24.0, 29.0, 25.0, 33.0, 43.0, ..."
597,Height: 32 Width: 32,cat,"[0.0, 0.0, 0.0470637083054, 0.0, ...","[133.0, 153.0, 138.0, 126.0, 146.0, 136.0, ..."
882,Height: 32 Width: 32,cat,"[0.0, 0.0, 0.156200289726, 0.0, ...","[141.0, 133.0, 112.0, 143.0, 133.0, 113.0, ..."
1252,Height: 32 Width: 32,cat,"[1.57850754261, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[217.0, 209.0, 207.0, 216.0, 208.0, 205.0, ..."
1696,Height: 32 Width: 32,cat,"[0.0, 0.0, 0.109984338284, 0.0, ...","[12.0, 12.0, 12.0, 18.0, 18.0, 18.0, 58.0, 58.0, ..."
1846,Height: 32 Width: 32,cat,"[0.0, 0.00756752490997, 0.0, 0.523695707321, ...","[60.0, 69.0, 42.0, 54.0, 66.0, 38.0, 71.0, 99.0, ..."


We are going to create a simple function to view the nearest neighbors to save typing:

In [35]:
def get_images_from_ids(query_result):
    return image_train.filter_by(query_result['reference_label'],'id')
#regresame todas la imagenes que matcheen con las referencs_labels

In [36]:
cat_neighbors = get_images_from_ids(knn_model.query(cat))
#encontrar los k-vecinos
#ver el query es el mismo que arriba

In [37]:
cat_neighbors['image'].explore()

Very cool results showing similar cats.

##Finding similar images to a car

In [38]:
car = image_train[8:9]
car['image'].explore()
#hacemos lo mismo para los carros

In [40]:
get_images_from_ids(knn_model.query(car))['image'].explore()
#aqui aparecen los similares

#Just for fun, let's create a lambda to find and show nearest neighbor images

In [47]:
show_neighbors = lambda i: get_images_from_ids(knn_model.query(image_train[i:i+1]))['image'].explore()
#lo quiere hacer para todas las entradas de la tabla de imagenes i(indice de la tabla)
#es la funcion que generaliza los ejercicios

In [48]:
show_neighbors(8)
#por eso, el numero que le des es como si hiciera la consulta siempre

In [49]:
show_neighbors(26)
#el numero es el numero de la imagen