[View in Colaboratory](https://colab.research.google.com/github/maro525/ml/blob/master/WorldImageLearning.ipynb)

In [1]:
!pip install glob2

Looking in indexes: https://pypi.org/simple, https://legacy.pypi.org/simple


In [2]:
# reference : https://qiita.com/tomo_makes/items/b3c60b10f7b25a0a5935
# reference : https://github.com/ml4a/ml4a-ofx/blob/master/scripts/tSNE-images.py

# download google fuse
# https://github.com/astrada/google-drive-ocamlfuse


!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse

gpg: keybox '/tmp/tmprcmas0qy/pubring.gpg' created
gpg: /tmp/tmprcmas0qy/trustdb.gpg: trustdb created
gpg: key AD5F235DF639B041: public key "Launchpad PPA for Alessandro Strada" imported
gpg: Total number processed: 1
gpg:               imported: 1


In [0]:
# create auth token
from google.colab import auth
auth.authenticate_user()

In [9]:
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

Please, open the following URL in a web browser: https://accounts.google.com/o/oauth2/auth?client_id=32555940559.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&response_type=code&access_type=offline&approval_prompt=force
··········
Please, open the following URL in a web browser: https://accounts.google.com/o/oauth2/auth?client_id=32555940559.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&response_type=code&access_type=offline&approval_prompt=force
Please enter the verification code: Access token retrieved correctly.


In [0]:
!mkdir -p drive
!google-drive-ocamlfuse drive

In [11]:
# !mkdir drive/data/world_images
!ls drive/data

mkdir: cannot create directory ‘drive/data/world_images’: File exists
tsne  world_images


In [31]:
import os
os.chdir('/content/drive/data/world_images')
#unzip world_images.zip

Archive:  world_images.zip
^C


In [58]:
import os
os.chdir('/content')
!ls
!pwd

datalab  drive
/content


In [0]:
# refer : https://e-soroush.github.io/tsne-visualization/

import numpy as np
import tensorflow as tf
from PIL import Image
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import os, re, glob2, pickle

In [34]:
from keras.engine import Model
from keras.layers import Input
from keras.preprocessing.image import load_img

import matplotlib.pyplot as plt
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [0]:
import sys
sys.path.append('drive/python/places365')

from vgg16_places365 import VGG16_Places365

In [0]:
# parameters
image_path = 'drive/data/world_images/'
num_labels = 365

model_weights_path = '/content/drive/python/places365/models/vgg16_places365_weight.npy'

json_path = '/content/drive/data/tsne/world_image.json'

img_size = 224

# tsne parameters
tsne_dimensions = 2
tsne_learning_rate = 150
tsne_perplexity = 30

In [46]:
!ls

datalab  drive


In [57]:
# os.normpath(model_weights_path)

AttributeError: ignored

In [0]:
def analyze_image(image_path):
  model = VGG16_Places365(model_weights_path)
  feat_extractor = Model(inputs=model.input, outputs=model.get_layer("fc7").output)
  input_shape = model.input_shape[1:3]
  
  image = load_img(image_path, target_size=(img_size, img_size))
  
  if image is not None:
    print("getting activations for {}".format(image_path))
    acts = feat_extractor.predict(image)[0]
    return acts
  

In [0]:
def get_pca_feature(image_folder_path):
  image_names = glob2.glob(image_path + "/*.jpg")
  
  activations = []
  for name in image_names:
    acts = analyze_image(name)
    activations.append(acts)
  
  print("Running PCA on {} images".format(len(activations)))
  features = np.array(activations)
  pca = PCA(n_components=num_labels)
  pca.fit(features)
  pca_features = pca.transform(features)
  
  return image_names, pca_features

In [0]:
def get_reduced_transformed_tsne(pca_features):
  X = np.array(pca_features)
  tsne = TSNE(n_components=tsne_dimensions, learning_rate=tsne_learning_rate, perplexity=tsne_perplexity, verbose=2)
  reduced = tsne.fit_transform(X)
  reduced_transformed = reduced - np.min(reduced, axis=0)
  reduced_transformed /= np.max(reduced_transformed, axis=0)

def get_xindex_sorted_tsne(pca_features):
  X = np.array(pca_features)
  tsne = TSNE(n_components=tsne_dimensions, learning_rate=tsne_learning_rate, perplexity=tsne_perplexity, verbose=2)
  reduced = tsne.fit_transform(X)
  reduced_transformed = reduced - np.min(reduced, axis=0)
  reduced_transformed /= np.max(reduced_transformed, axis=0)
  image_xindex_sorted = np.argsort(np.sum(reduced_transformed, axis=1))
  
  return tsne, image_xindex_sorted

In [0]:
def get_points_data_from_tsne(tsne, image_names):
  data = []
  for i, f in enumerate(image_names):
    point = [float((tsne[i,k] - np.min(tsne[:,k]))/(np.max(tsne[:,k]) - np.min(tsne[:,k]))) for k in range(tsne_dimensions)]
    data.append({"path":os.path.abspath(os.path.join(images_path, images[i])), "point":point})
  return data

def save_tsne_to_json(tsne, image_names, output_path):
  data = []
  for i, f in enumerate(image_names):
    point = [float((tsne[i,k] - np.min(tsne[:,k]))/(np.max(tsne[:,k]) - np.min(tsne[:,k]))) for k in range(tsne_dimensions)]
    data.append({"path":os.path.abspath(os.path.join(images_path, images[i])), "point":point})
  with open(output_path, 'w') as outfile:
    json.dump(data, outfile)

In [0]:
def plt_visualize(image_names, image_xindex_sorted, reduced_transformed, im_size, ellipside=True, save_image=False):
  img_len = len(image_xindex_sorted)
  marged_width = int(np.ceil(np.sqrt(img_len))*im_size)
  merged_image = np.zeros((merged_width, merged_width, 3), dtype='uint8')
  
  for i, f in enumurate(image_xindex_sorted):
    if ellipside:
      a = np.ceil(reduced_transformed[i, 0] * (merged_width - im_size - 1) + 1)
      b = np.ceil(reduced_transformed[i, 1] * (merged_width - im_size - 1) + 1)
      a = int(a - np.mod(a-1, im_size) + 1)
      b = int(b - np.mod(b-1, im_size) + 1)
      if merged_image[a,b,0] != 0:
        continue
      image_address = image_names[i]
      img = load_img(image_address, target_size=(im_size, im_size))
      merged_image[a:a+im_size, b:b+im_size,:] = img[:,:,:3]
    else:
      b = int(np.mod(i, np.sqrt(img_len)))
      a = int(np.mod(i // np.sqrt(img_len), np.sqrt(img_len)))
      image_address = image_nmes[i]
      img = load_img(image_address, target_size=(im_size, im_size))
      merged_image[a*im_size:(a+1)*im_size, b*im_size:(b+1)*im_size,:] = img[:, :, :3]
      
  plt.imshow(merged_image)
  plt.show()
  
  if save_image:
    merged_image = Image.fromarray(merged_image)
    merged_image.save('tsne.png')

In [68]:
image_names, pca_features = get_pca_feature(image_path)

getting activations for drive/data/world_images/IMG@46.017759,18.155306_188.jpg


AttributeError: ignored