In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import random
import tensorflow as tf
from PIL import Image

In [2]:
random.seed(3457)

In [3]:
def splitFilename(filename):
    return [word.replace('-', ' ') for word in filename.split(separator)[:-1]]

def random_batch(x, y):
    n_data = len(x)

    # Create a random index.
    idx = np.random.choice(n_data,
                           size=batch_size,
                           replace=False)

    # Use the random index to select random images and labels.
    x_batch = x[idx, :, :, :]
    y_batch = y[idx, :]

    yield (x_batch, y_batch)

def getbatch(x, y, batch_size=256):
    n_batches = n_data//batch_size
    for batch_number in range(n_batches):
        rand_index = np.array([random.randrange(0, n_data) for i in range(batch_size)])
#         print(rand_index)
        batch_x = x[rand_index, :, :, :]
#         print("x.shape:", batch_x.shape)
#         batch_x = (batch_x-batch_x.min())/(batch_x.max()-batch_x.min())
        batch_y = y[rand_index]
#         print("y.shape:", batch_y.shape)
        yield (batch_x, batch_y)

In [4]:
# Global constants
images_directory = './images_3/'
separator = "_"
image_width = 136
image_height = 180
image_channel = 3
image_shape = (image_height, image_width, image_channel)
image_size = image_height * image_width * image_channel
continents = {'africa': 0, 'asia': 1, 'australia': 2, 'europe': 3, 'north america': 4, 'south america': 5}
country_continent_dict = {'morocco': 'africa', 'kosovo': 'europe', 'guinea bissau': 'africa', 'australia': 'australia', 'kazakhstan': 'asia', 'montenegro': 'europe', 'sao tome and principe': 'africa', 'madagascar': 'africa', 'latvia': 'europe', 'tunisia': 'africa', 'spain': 'europe', 'japan': 'asia', 'italy': 'europe', 'mali': 'africa', 'peru': 'south america', 'brazil': 'south america', 'martinique': 'north america', 'togo': 'africa', 'bolivia': 'south america', 'macedonia': 'europe', 'lithuania': 'europe', 'comoros': 'africa', 'senegal': 'africa', 'turkey': 'europe', 'venezuela': 'south america', 'neukaledonien': 'australia', 'sierra leone': 'africa', 'kyrgyzstan': 'asia', 'northern ireland': 'europe', 'dominican republic': 'north america', 'korea, north': 'asia', 'panama': 'north america', 'canada': 'north america', 'mauritania': 'africa', 'central african republic': 'africa', 'poland': 'europe', 'french guiana': 'africa', 'afghanistan': 'asia', 'armenia': 'asia', 'eritrea': 'africa', 'cyprus': 'europe', 'switzerland': 'europe', 'kenya': 'africa', 'faroe island': 'europe', 'georgia': 'asia', 'iran': 'asia', 'korea, south': 'asia', 'saudi arabia': 'asia', 'finland': 'europe', 'guadeloupe': 'north america', 'estonia': 'europe', 'serbia': 'europe', 'denmark': 'europe', 'greece': 'europe', 'chile': 'south america', 'liechtenstein': 'europe', "cote d'ivoire": 'africa', 'cape verde': 'africa', 'portugal': 'africa', 'england': 'europe', 'azerbaijan': 'asia', 'dr congo': 'africa', 'sweden': 'europe', 'rwanda': 'africa', 'zambia': 'africa', 'iraq': 'asia', 'egypt': 'africa', 'hungary': 'europe', 'honduras': 'north america', 'equatorial guinea': 'africa', 'mexico': 'north america', 'slovenia': 'europe', 'france': 'europe', 'jamaica': 'north america', 'argentina': 'south america', 'uruguay': 'south america', 'mauritius': 'africa', 'chad': 'africa', 'benin': 'africa', 'nigeria': 'africa', 'uzbekistan': 'asia', 'trinidad and tobago': 'africa', 'ukraine': 'europe', 'ireland': 'europe', 'algeria': 'africa', 'czech republic': 'europe', 'paraguay': 'south america', 'united states': 'north america', 'romania': 'europe', 'iceland': 'europe', 'austria': 'europe', 'bulgaria': 'europe', 'gabon': 'africa', 'ecuador': 'south america', 'russia': 'europe', 'uganda': 'africa', 'china': 'asia', 'albania': 'europe', 'cameroon': 'africa', 'palästina': 'asia', 'guinea': 'africa', 'scotland': 'europe', 'slovakia': 'europe', 'belarus': 'europe', 'luxembourg': 'europe', 'belgium': 'europe', 'germany': 'europe', 'israel': 'asia', 'lebanon': 'asia', 'indonesia': 'asia', 'malta': 'europe', 'mozambique': 'africa', 'south africa': 'africa', 'congo': 'africa', 'new zealand': 'australia', 'wales': 'europe', 'tanzania': 'africa', 'curacao': 'south america', 'costa rica': 'north america', 'haiti': 'north america', 'jordan': 'asia', 'libya': 'africa', 'philippines': 'asia', 'croatia': 'europe', 'angola': 'africa', 'bosnia herzegovina': 'europe', 'tajikistan': 'asia', 'chinese taipei (taiwan)': 'asia', 'colombia': 'south america', 'netherlands': 'europe', 'burundi': 'africa', 'liberia': 'africa', 'ghana': 'africa', 'norway': 'europe', 'moldova': 'europe', 'syria': 'asia', 'zimbabwe': 'africa', 'the gambia': 'africa', 'burkina faso': 'africa'}

n_data = 0
for filename in os.listdir(images_directory):
    if os.path.isfile(os.path.join(images_directory, filename)) and filename.endswith(".jpg"):
        image = Image.open(images_directory + filename)
        im2arr = np.array(image) # im2arr.shape: height x width x channel
        if im2arr.shape[0] != image_height or im2arr.shape[1] != image_width or im2arr.shape[2] != image_channel:
            continue
        n_data += 1

### Setting Hyper-parameters

In [5]:
# Hyper-parameters
learningrate = 0.001

# n_data = len([name for name in os.listdir(images_directory)
#               if os.path.isfile(os.path.join(images_directory, name))
#               and name.endswith(".jpg")])

n_epochs = 10
batch_size = 128
# n_batches = 10 # n_data//batch_size
n_input = image_size
n_classes = 6
dropout = 0.75

### Reading Data

In [6]:
x = np.ndarray(shape=(n_data, image_height, image_width, image_channel), dtype=np.float32)
y = []

index = 0
for filename in os.listdir(images_directory):
    if os.path.isfile(os.path.join(images_directory, filename)) and filename.endswith(".jpg"):
        # print(filename)
        image = Image.open(images_directory + filename)
        im2arr = np.array(image) # im2arr.shape: height x width x channel
        if im2arr.shape[0] != image_height or im2arr.shape[1] != image_width or im2arr.shape[2] != image_channel:
            continue
        x[index] = Image.fromarray(np.array(image))
        y.append(continents[country_continent_dict[splitFilename(filename)[3]]])
        index += 1

# One-hot encode labels for all data
y = np.array(y, dtype=np.int)
y = np.eye(n_classes)[y]
print(y[25])

print("Data Size:", n_data)
print("Index:", index)
print(x.shape)
print(y.shape)
print(y[25])

[ 1.  0.  0.  0.  0.  0.]
Data Size: 7493
Index: 7493
(7493, 180, 136, 3)
(7493, 6)
[ 1.  0.  0.  0.  0.  0.]


### Defining X and Y as placeholders

In [7]:
X = tf.placeholder(tf.float32, [None, image_height, image_width, image_channel])
Y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)
print(X.shape, Y.shape)

(?, 180, 136, 3) (?, 6)


### Defining Model Using conv2d, ReLu and Maxpooling

In [8]:
def conv2d(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

In [9]:
def maxpool2d(x, k=2):
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')

In [10]:
def conv_net(x, weights, biases, dropout):
    # Convolution layer 1
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    # Max pooling
    conv1 = maxpool2d(conv1, k=2)

    # Convolution layer 2
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    # Max pooling
    conv2 = maxpool2d(conv2, k=2)

    # Fully connected layer
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, dropout)

    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out

### Initializing Weights and Biases

In [11]:
weights = {
    'wc1': tf.Variable(tf.random_normal([5, 5, 3, 32])),
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    'wd1': tf.Variable(tf.random_normal([34*45*64, 1024])),
    'out': tf.Variable(tf.random_normal([1024, n_classes]))
}

biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

### Define Cost, Optimizer and Accuracy

In [12]:
# Create the model
model = conv_net(X, weights, biases, keep_prob)
print(model)

# Define loss and optimizer
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=model, labels=Y))
train_min = tf.train.AdamOptimizer(learning_rate=learningrate).minimize(loss)

# Evaluate model
correct_model = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_model, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()

Tensor("Add_1:0", shape=(?, 6), dtype=float32)


### Running the Session 

In [13]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_x, batch_y in getbatch(x, y):
            sess.run(train_min, feed_dict={X:batch_x, Y:batch_y, keep_prob: dropout})
        for batch_x, batch_y in getbatch(x, y):
            losscalc, accuracycalc = sess.run([loss, accuracy], 
                                              feed_dict={X:batch_x, Y:batch_y, keep_prob: 1.0})
            print("Epoch: %d, Loss: %0.4f, Accuracy: %0.4f" % (epoch, losscalc, accuracycalc))
            break;
            
#         for _ in range(n_batches):
#             got = getbatch(x, y)
#             print("got:", got)
#             batch_x, batch_y = getbatch(x, y)
#             #batch_y = batch_y.astype(np.float32)
#             # Use training data for optimization
#             sess.run(train_min, feed_dict={X:batch_x, Y:batch_y, keep_prob: dropout})
        # Validate after every epoch
#         batch_x, batch_y = getbatch(x, y)
#         losscalc, accuracycalc = sess.run([loss, accuracy], 
#                                           feed_dict={X:batch_x, Y:batch_y, keep_prob: 1.0})
#         print("Epoch: %d, Loss: %0.4f, Accuracy: %0.4f"%(epoch, losscalc, accuracycalc))
            
    # When the training is complete and you are happy with the result
    accuracycalc = sess.run(accuracy,
                            feed_dict={X: mnist.test.images, Y: mnist.test.labels, keep_prob: 1.0})
    print("Testing accuracy: %0.4f"%(accuracycalc))

Epoch: 0, Loss: 27244652.0000, Accuracy: 0.7344
Epoch: 1, Loss: 16264012.0000, Accuracy: 0.7227
Epoch: 2, Loss: 7355523.0000, Accuracy: 0.6914
Epoch: 3, Loss: 6907926.0000, Accuracy: 0.7383
Epoch: 4, Loss: 3421578.5000, Accuracy: 0.7266
Epoch: 5, Loss: 2291593.7500, Accuracy: 0.7578
Epoch: 6, Loss: 1243703.8750, Accuracy: 0.8008
Epoch: 7, Loss: 888712.0000, Accuracy: 0.7891
Epoch: 8, Loss: 463129.4375, Accuracy: 0.7773
Epoch: 9, Loss: 255529.4375, Accuracy: 0.7383


NameError: name 'mnist' is not defined

In [15]:
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        for batch_x, batch_y in getbatch(x, y):
            sess.run(train_min, feed_dict={X:batch_x, Y:batch_y, keep_prob: dropout})
        for batch_x, batch_y in getbatch(x, y):
            losscalc, accuracycalc = sess.run([loss, accuracy], 
                                              feed_dict={X:batch_x, Y:batch_y, keep_prob: 1.0})
            print("Epoch: %d, Loss: %0.4f, Accuracy: %0.4f" % (epoch, losscalc, accuracycalc))
            break;

Epoch: 0, Loss: 26114432.0000, Accuracy: 0.7070
Epoch: 1, Loss: 13132954.0000, Accuracy: 0.7383
Epoch: 2, Loss: 8819888.0000, Accuracy: 0.6992


KeyboardInterrupt: 