<a href="https://colab.research.google.com/github/marceloqs/VLDL_Tarea1/blob/main/Tarea1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#!git clone https://github.com/jmsaavedrar/visual_attributes.git
#!git clone https://github.com/jmsaavedrar/convnet2.git
!git clone https://github.com/marceloqs/VLDL_Tarea1.git
!mv -t /content/ /content/VLDL_Tarea1/convnet2/ /content/VLDL_Tarea1/data/ /content/VLDL_Tarea1/visual_attributes/

Cloning into 'VLDL_Tarea1'...
remote: Enumerating objects: 164, done.[K
remote: Counting objects: 100% (24/24), done.[K
remote: Compressing objects: 100% (20/20), done.[K
remote: Total 164 (delta 3), reused 14 (delta 1), pack-reused 140[K
Receiving objects: 100% (164/164), 41.29 MiB | 7.97 MiB/s, done.
Resolving deltas: 100% (36/36), done.


In [None]:
# Descargamos los datos y los descomprimimos
!mkdir -p /content/data/Resnet50
%cd /content/data/Resnet50

!wget https://www.dropbox.com/s/ri743kwqh8t6a7r/dataset_atributos.zip
!unzip dataset_atributos.zip
!rm dataset_atributos.zip

In [3]:
# Generamos los catálogos con las rutas de las imágenes
import os
import pandas as pd
import sys

for set_name in ["color", "texture"]:
    print(f"Generando catálogos para {set_name} set ...")
    data_dir = os.path.join("/content/data/Resnet50/dataset_atributos/", set_name)

    # Leer todas las rutas de las imágenes y crear un DataFrame de pandas
    filenames = []
    for path, subdirs, files in os.walk(data_dir):
        for name in files:
            filenames.append(os.path.join(path, name))
    df = pd.DataFrame()
    df["filenames"] = filenames
    df = df.sample(frac=1)  # desordenar aleatoriamente todo el dataset

    for block_idx in range(1, 6):
        output_dir = f"/content/data/{set_name}/block_{block_idx}/ssearch"
        if not os.path.exists(output_dir):
            print(f"    Creando directorio para el bloque {block_idx} ...")
            os.makedirs(output_dir)
            output_fn = os.path.join(output_dir, "catalog.txt")
            df.to_csv(output_fn, header=False, index=False)

Generando catálogos para color set ...
Generando catálogos para texture set ...


In [None]:
%cd /content/visual_attributes

#Generacion de features
for set_name in ["color", "texture"]:
  print(f"Generando features para {set_name} set ...")
  for block_ixd in range(1, 6):
      netParam = "RESNET_{}_{}".format(set_name, block_ixd)
      print(netParam)
      !python ssearch.py -config config/resnet50.config -name $netParam -mode compute 

In [None]:
# Instalar UMAP
!pip install umap-learn[plot] 

In [6]:
import plotly.express as px
import umap
from plotly.subplots import make_subplots

color_discrete_map = {'negro': 'black', 'cafe': 'brown', 'gris': 'gray', 'amarillo' : 'yellow', 'rosado' : 'pink', 'verde' : 'green', 'morado' : 'purple', 'naranjo' : 'orange', 'azul' : 'blue', 'rojo' : 'red'}

def get_scatter(fig_df, color_col, title):
    # Plot df
    fig = px.scatter(
        fig_df,
        x="x",
        y="y",
        color=color_col,
        color_discrete_map=color_discrete_map,
        title=title,
        hover_data=[
        "filenames"
    ],
    height=600,
    width=600
    )
    return fig

def normalize(data) :
    # Normalizar los vectores para que sean unitarios
    norm = np.sqrt(np.sum(np.square(data), axis=1))
    norm = np.expand_dims(norm, 0)        
    data = data / np.transpose(norm)
    return data

def compute_ranking(fv):
    # Retorna un ranking ordenado, usando como criterio la similitud coseno.
    normalized_fv = normalize(fv)
    sim = np.matmul(normalized_fv, np.transpose(normalized_fv))
    idx_sorted = np.argsort(-sim, axis=-1)
    return idx_sorted

In [130]:
# Cargamos el catálogo y los feature vectors obtenidos
import numpy as np
import pandas as pd

for set_name in ["color", "texture"]:
  for block_ixd in range(1, 6):
      #Carga de features
      base_dir = "/content/data/{}/block_{}/ssearch".format(set_name, block_ixd)
      features_shape = np.fromfile(f"{base_dir}/features_shape.np", dtype=np.int32)
      features = np.fromfile(f"{base_dir}/features.np", dtype=np.float32)
      features = np.reshape(features, features_shape)

      #Carga de catalogo
      catalog_df = pd.read_csv(f"{base_dir}/catalog.txt", names=["filenames"])

      #Reducir dimensionalidad con UMAP
      features_2d = umap.UMAP(random_state=11).fit_transform(features)

      #Añadir nuevas dimesiones UMAP a df
      catalog_df.loc[:, "class"] = catalog_df["filenames"].str.split('/',expand=True)[6]
      catalog_df.loc[:, "x"] = features_2d[:, 0]
      catalog_df.loc[:, "y"] = features_2d[:, 1]

      #Calculo Accuracy
      ranking = compute_ranking(features)
      hits = 0
      df_rank = pd.DataFrame(np.delete(ranking, np.s_[2::], 1),columns = ['Clase','Rank1'])
      df_rank = df_rank.assign(Class=catalog_df.loc[df_rank["Clase"]]["filenames"].str.split('/',expand=True)[6].values,
                              Rank1_class=catalog_df.loc[df_rank["Rank1"]]["filenames"].str.split('/',expand=True)[6].values)
      for index, row in df_rank.iterrows():
          if row["Class"] == row["Rank1_class"]:
            hits +=1
      acc = hits / len(df_rank)

      #Plot
      title="Clase: {}, Bloque RN50: {}, Acc: {}".format(set_name, block_ixd, acc)
      fig = get_scatter(catalog_df, catalog_df.loc[:, "class"], title)
      fig.show()