In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from imageio import imread

  from ._conv import register_converters as _register_converters


In [2]:
sess = tf.InteractiveSession()

### Initial parameters

In [3]:
width = 512
height = 512 
flat = width * height # number of pixels in one image 
class_output = 38 # number of possible classifications for the problem 

COUNT = 1000

### Input and output

In [4]:
x  = tf.placeholder(tf.float32, shape=[None, width, height, 3])
y_ = tf.placeholder(tf.float32, shape=[None, class_output])

#### Converting images of the data set to tensors

In [6]:
# x_image = tf.to_float(tf.reshape(x, [-1,512,512,3]))
x_image = tf.reshape(x, [-1,512,512,3])
x_image

<tf.Tensor 'Reshape_2:0' shape=(?, 512, 512, 3) dtype=float32>

### Convolutional Layer 1

#### Defining kernel weight and bias

In [7]:
W_conv1 = tf.Variable(tf.truncated_normal([5, 5, 3, 32], stddev=0.1))
b_conv1 = tf.Variable(tf.constant(0.1, shape=[32])) # need 32 biases for 32 outputs

#### Convolve with weight tensor and add biases.

In [8]:
convolve1= tf.nn.conv2d(x_image, W_conv1, strides=[1, 1, 1, 1], padding='SAME') + b_conv1

#### Apply the ReLU activation Function
wherever a negative number occurs,we swap it out for a 0.

In [9]:
h_conv1 = tf.nn.relu(convolve1)

#### Apply the max pooling

In [10]:
conv1 = tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #max_pool_2x2
conv1

<tf.Tensor 'MaxPool:0' shape=(?, 256, 256, 32) dtype=float32>

### Convolutional Layer 2

In [11]:
W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 32, 64], stddev=0.1))
b_conv2 = tf.Variable(tf.constant(0.1, shape=[64])) 

convolve2= tf.nn.conv2d(conv1, W_conv2, strides=[1, 1, 1, 1], padding='SAME')+ b_conv2

h_conv2 = tf.nn.relu(convolve2)

conv2 = tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #max_pool_2x2
conv2

<tf.Tensor 'MaxPool_1:0' shape=(?, 128, 128, 64) dtype=float32>

### Fully Connected Layer

#### Flattening Second Layer

In [12]:
layer2_matrix = tf.reshape(conv2, [-1, 128*128*64])
layer2_matrix

<tf.Tensor 'Reshape_3:0' shape=(?, 1048576) dtype=float32>

#### Weights and Biases between layer 2 and 3

In [13]:
W_fc1 = tf.Variable(tf.truncated_normal([128 * 128 * 64, 1024], stddev=0.1))
b_fc1 = tf.Variable(tf.constant(0.1, shape=[1024]))

#### Matrix Multiplication (applying weights and biases)

In [14]:
fcl=tf.matmul(layer2_matrix, W_fc1) + b_fc1

#### Apply the ReLU activation Function

In [15]:
h_fc1 = tf.nn.relu(fcl)
h_fc1

<tf.Tensor 'Relu_2:0' shape=(?, 1024) dtype=float32>

#### Dropout Layer, Optional phase for reducing overfitting

In [16]:
keep_prob = tf.placeholder(tf.float32)
layer_drop = tf.nn.dropout(h_fc1, keep_prob)
layer_drop

<tf.Tensor 'dropout/mul:0' shape=(?, 1024) dtype=float32>

###  Readout Layer (Softmax Layer)

#### Weights and Biases

In [17]:
W_fc2 = tf.Variable(tf.truncated_normal([1024, class_output], stddev=0.1)) 
b_fc2 = tf.Variable(tf.constant(0.1, shape=[class_output]))

#### Matrix Multiplication (applying weights and biases)

In [18]:
fc=tf.matmul(layer_drop, W_fc2) + b_fc2

#### Apply the Softmax activation Function

In [19]:
y_CNN= tf.nn.softmax(fc)
y_CNN

<tf.Tensor 'Softmax:0' shape=(?, 38) dtype=float32>

# Define functions and train the model

#### Define the loss function

In [20]:
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_CNN), reduction_indices=[1]))

#### Define the optimizer

In [21]:
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

#### Define prediction

In [22]:
correct_prediction = tf.equal(tf.argmax(y_CNN,1), tf.argmax(y_,1))

#### Define accuracy

In [23]:
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

#### Run session, train

In [24]:
sess.run(tf.global_variables_initializer())

#### read data

In [25]:
df = pd.read_csv("train.csv")
imgs = []
landmarks = []
for i in range(50):
    imgs.append(imread("./train/{}.png".format(str(df.id[i]))))
    landmarks.append([df.landmark_id[i]])
# filenames
# landmarks
imgs = np.array(imgs).astype(np.float32)

In [27]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()  # one-hot encoding 
Y=mlb.fit_transform(landmarks)
# check size of indicator matrix
Y = Y.astype(np.float32)
# check classes
# mlb.classes_
# Y

In [None]:
# def _parse_function(filename, label):
#   image_string = tf.read_file(filename)
#   image_decoded = tf.image.decode_image(image_string)
#   return image_decoded, label

# files = tf.constant(filenames)
# labels = tf.constant(landmarks)

# dataset = tf.data.Dataset.from_tensor_slices((files, labels))
# dataset = dataset.map(_parse_function)
# dataset

#### trian data

In [None]:
for i in range(COUNT):
    if (i+1)%10 == 0:
        train_accuracy = accuracy.eval(feed_dict={x: imgs, y_:Y , keep_prob: 1.0})
        print("step %d, training accuracy %g"%(i+1, float(train_accuracy)))
    train_step.run(feed_dict={x: imgs, y_:Y, keep_prob: 0.5})

In [None]:
sess.close()