In [76]:
# This notebook take .fits images and compute the resnet50 descriptors
# After that it save the descriptors in a given folder to be used for any porpuse
import os
import PIL
import cv2
import glob
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random

from collections import OrderedDict
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm, tqdm_notebook
from PIL import Image
from scipy import sparse
from astropy.io import fits

%matplotlib inline

In [77]:
import keras
import keras.backend as K
from keras.applications import nasnet

In [78]:
import tensorflow as tf
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 2601809339823777483, name: "/device:XLA_CPU:0"
 device_type: "XLA_CPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 10768651888935699840
 physical_device_desc: "device: XLA_CPU device"]

In [79]:
config = tf.ConfigProto( device_count = {'CPU': 16} ) 
sess = tf.Session(config=config) 
keras.backend.set_session(sess)

In [80]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

[]

In [81]:
# Define the model
model = keras.applications.resnet50.ResNet50(include_top=True, 
                                                 weights='imagenet', 
                                                 classes=1000)

In [82]:
# Get the .fits image, imname, and get the data for the corresponding extension, ext
def get_image(imname, ext, verbose=False):
    
    hdu = fits.open(imname)
    data = hdu[ext].data
    image = np.empty((3,data.shape[0],data.shape[1]))
    image[0] = np.copy(data)
    image[1] = np.copy(data)
    image[2] = np.copy(data)
    image = np.swapaxes(image,0,1)
    image = np.swapaxes(image,1,2)

    return image

In [83]:
# Get descriptors from data for the model
def get_descriptor(model, out_layer, model_input):
    get_3rd_layer_output = K.function([model.layers[0].input],
                                      [model.get_layer(out_layer).output])
    layer_output = get_3rd_layer_output([model_input])[0]
    return layer_output

In [84]:
# Given a list of files, call to get descriptors to create the descriptors arrays 
def process_imgs(dir_list, layer, ext):

    descriptor_len = model.get_layer(layer).output.shape[-1].value
    descriptors = np.zeros((len(dir_list), descriptor_len))
    
    with tqdm(total=len(dir_list)) as pbar:
        for img_idx in range(0, len(dir_list)):
            imgname = dir_list[img_idx]

            try:
                img = get_image(imgname, ext)
                img = np.expand_dims(img, axis=0)
                desc = get_descriptor(model, layer, img)

                descriptors[img_idx, :] = desc
            except:
                descriptors[img_idx, :] = np.nan
            
            pbar.update(1)
            
    return descriptors

In [85]:
# Paths
PROJECT_DIR = "/data/notebooks/uves_jprieto/Tutorial"
# Model checkpoints
CHECKPOINT_DIR = os.path.join(PROJECT_DIR, "checkpoint")

In [86]:
# List of fits images to process
# * bias_red:
#   - UVES_RED_BIAS (ext: 1,2)
# * bias_blue:
#   - UVES_BLUE_BIAS (ext: 0)
# * blue_arc_flat:
#   - UVES_BLUE_WAVE (ext: 1,2)
#   - UVES_DIC1B_FLAT (ext: 0)
#   - UVES_DIC1B_DFLAT (ext: 0)
# * red_arc_flat:
#   - UVES_RED_WAVE (ext: 1,2)
#   - UVES_DIC1R_FLAT (ext: 1,2)
# The following lines produce the resnet50 descriptors for the images inside
# fits_folder and the corresponding extentsion 
fits_folder = 'UVES_RED_BIAS'
ext = 1

fits_data_dir = './fits_for_test'
fits_data_dir = os.path.join(fits_data_dir,fits_folder)
fits_files = [os.path.join(fits_data_dir,s) for s in os.listdir(fits_data_dir)]

# Number of images to process
n_files = 10
#n_files = len(fits_files)
print('Folder '+ fits_folder +' contains '+ str(len(fits_files)) + ' .fits files.')
print('Selecting ' + str(n_files) + ' files.')

# For random selection
#IMG_DIRECTORY_LIST = random.sample(fits_files, n_files)
#IMG_DIRECTORY_LIST[0:n_files]

IMG_DIRECTORY_LIST = fits_files[0:n_files]
IMG_DIRECTORY_LIST

Folder UVES_RED_BIAS contains 97 .fits files.
Selecting 10 files.


['./fits_for_test/UVES_RED_BIAS/UVES_RED_BIAS050_0005.fits',
 './fits_for_test/UVES_RED_BIAS/UVES_RED_BIAS245_0001.fits',
 './fits_for_test/UVES_RED_BIAS/UVES_RED_BIAS285_0001.fits',
 './fits_for_test/UVES_RED_BIAS/UVES_RED_BIAS202_0002.fits',
 './fits_for_test/UVES_RED_BIAS/UVES_RED_BIAS275_0001.fits',
 './fits_for_test/UVES_RED_BIAS/UVES_RED_BIAS147_0004.fits',
 './fits_for_test/UVES_RED_BIAS/UVES_RED_BIAS243_0005.fits',
 './fits_for_test/UVES_RED_BIAS/UVES_RED_BIAS087_0003.fits',
 './fits_for_test/UVES_RED_BIAS/UVES_RED_BIAS248_0002.fits',
 './fits_for_test/UVES_RED_BIAS/UVES_RED_BIAS235_0004.fits']

In [87]:
# The new descriptors are saved in CHECK_POINT_DIR/full_unflat_desc_...
descs_resNet50 = process_imgs(IMG_DIRECTORY_LIST, 'avg_pool', ext) #, save_resized=True) # Already resized in Gotham
np_descs = np.array(descs_resNet50)
np.save(os.path.join(CHECKPOINT_DIR, 'full_unflat_desc_'+str(fits_folder)+'_ext'+str(ext)+'_resNet50_N'+str(n_files)), descs_resNet50) 

100%|██████████| 10/10 [01:07<00:00,  6.89s/it]


In [75]:
print(np_descs)

[[4.67186347e-02 2.17468981e-02 4.45539691e-02 ... 2.38590837e+00
  9.31940507e-03 2.65751703e-04]
 [7.47947618e-02 2.63851229e-02 2.76189893e-02 ... 2.75831056e+00
  9.24374908e-03 1.78269739e-03]
 [5.60873523e-02 8.93160421e-03 7.20004439e-02 ... 1.40532506e+00
  5.12590446e-03 0.00000000e+00]
 ...
 [6.76995441e-02 3.24297957e-02 7.74829388e-02 ... 1.47372866e+00
  8.81641917e-03 7.49989471e-04]
 [2.65275687e-02 1.46869395e-03 6.37227893e-02 ... 1.69652164e+00
  1.62558630e-03 0.00000000e+00]
 [4.35822271e-02 1.70284994e-02 3.22689004e-02 ... 2.48224235e+00
  7.52069335e-03 3.18373175e-04]]
