In [68]:
import numpy as np
import os
import pandas as pd
import h5py

from astrometry.util.fits import fits_table, merge_tables

# To plot pretty figures
%matplotlib inline
#%matplotlib notebook

# to make this notebook's output stable across runs
def reset_graph(seed=7):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)


import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

import tensorflow as tf

%load_ext autoreload
%autoreload 2

  return f(*args, **kwds)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [69]:
from obiwan.qa.visual import plotImage

In [76]:
f_real= h5py.File(os.path.join(os.environ['HOME'],'DOWNLOADS',
                    'dr5_hdf5/hdf5/116/1165p107/img_ivar_grz.hdf5'),
                    'r')
f_fake= h5py.File(os.path.join(os.environ['HOME'],'DOWNLOADS',
                    'elg_dr5_coadds/hdf5/121/1211p060/img_ivar_grz.hdf5'),
                    'r')

print(len(f_real.keys()),len(f_fake.keys()))

876 2547


In [87]:
def get_data(f,num=128):
    """Returns numpy array (num,64,64,6)"""
    return np.array([np.stack([f[key+'/img'],f[key+'/ivar']],axis=-1).reshape((64,64,6))
                     for key in list(f.keys())[:num]])

def get_data_imgonly(f,num=128):
    """Returns numpy array (num,64,64,3)"""
    return np.array([np.reshape(f[key+'/img'],(64,64,3))
                     for key in list(f.keys())[:num]])

images_real= get_data(f_real)
images_real.shape


(128, 64, 64, 6)

In [85]:
np.array(images_real).shape

(128, 64, 64, 6)

In [72]:
images_real[0].size,images_real[0].shape,64**2*6

(24576, (64, 64, 6), 24576)

# Adapted from https://github.com/ageron/handson-ml

In [129]:
# Design:
# input, 3x(conv + avg pool), 2x(fc)

n_inputs, height,width,channels = images_real.shape

reset_graph()

conv_kwargs= dict(strides=1,
                  padding='SAME',
                  activation=tf.nn.relu)
pool_kwargs= dict(ksize= [1,2,2,1],
                  strides=[1,2,2,1],
                  padding='VALID')

with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, shape=[None,height,width,channels], name="X") #training data shape
    X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])
    y = tf.placeholder(tf.int32, shape=[None,2], name="y") #2 classes

# 64x64
with tf.name_scope("layer1"):
    conv1 = tf.layers.conv2d(X_reshaped, filters=32, kernel_size=7,
                             **conv_kwargs)
    pool1 = tf.nn.avg_pool(conv1, **pool_kwargs)

# 32x32
with tf.name_scope("layer2"):
    conv2 = tf.layers.conv2d(pool1, filters=64, kernel_size=7,
                             **conv_kwargs)
    pool2 = tf.nn.avg_pool(conv2, **pool_kwargs)

# 16x16
with tf.name_scope("layer3"):
    conv3 = tf.layers.conv2d(pool2, filters=128, kernel_size=7,
                             **conv_kwargs)
    pool3 = tf.nn.avg_pool(conv3, **pool_kwargs)
    # next is fc
    pool3_flat = tf.reshape(pool3, shape=[-1, 128, 16, 16])


with tf.name_scope("fc"):
    fc = tf.layers.dense(pool3_flat, 128, activation=tf.nn.relu, name="fc")

with tf.name_scope("output"):
    logits = tf.layers.dense(fc, 2, name="output") # classes
    Y_proba = tf.nn.softmax(logits, name="Y_proba")

with tf.name_scope("train"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
    loss = tf.reduce_mean(xentropy)
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

with tf.name_scope("init_and_save"):
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

ValueError: Rank mismatch: Rank of labels (received 2) should equal rank of logits minus 1 (received 4).

In [113]:
img_real= get_data_imgonly(f_real,num=64)
img_fake= get_data_imgonly(f_fake,num=64)
Xtrain= np.vstack([img_real,img_fake])
# fake == 1
Ytrain= np.zeros(Xtrain.shape[0])
Ytrain[Ytrain.shape[0] // 2:]= 1
ind= np.arange(Xtrain.shape[0])
np.random.shuffle(ind)
Xtrain= Xtrain[ind,...]
Ytrain= Ytrain[ind]
Xtrain.shape,Ytrain.shape

((128, 64, 64, 3), (128,))

In [120]:
def BatchGen(Xtrain,Ytrain,batch_size):
    # if not perfect divide, will drop extra training instances
    N= Xtrain.shape[0]
    ind= np.array_split(np.arange(N),N // batch_size)
    for i in ind:
        yield Xtrain[i,...],Ytrain[i].reshape(-1,1).astype(np.int32)
    
a=BatchGen(Xtrain,Ytrain,32)
for x,y in a:
    print(x.shape,y.shape)

(32, 64, 64, 3) (32, 1)
(32, 64, 64, 3) (32, 1)
(32, 64, 64, 3) (32, 1)
(32, 64, 64, 3) (32, 1)


In [127]:
n_epochs = 8
batch_size = 32

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        data_gen= BatchGen(Xtrain,Ytrain,batch_size)
        for X_batch,y_batch in data_gen:
            print(X_batch.dtype,y_batch.dtype)
            print(X_batch.shape,y_batch.shape)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        print(epoch, "Train accuracy:", acc_train)

        save_path = saver.save(sess, "./my_mnist_model")

float32 int32
(32, 64, 64, 3) (32, 1)


TypeError: unhashable type: 'numpy.ndarray'

In [119]:
Xtrain.dtype

dtype('float32')