# Logo Impressions


__First Impression:__

To understand if there is any relation of logo's(120, 120, 4) and their impression with people. So, I have taken few logo's from following website. Source: http://www.logosdatabase.com/top-500-logos


__Goal:__

To build a Conv net classifier that, that can seperate images based on their logo popularity.

I took top 0-100 logo's and 400-500 logo's as class A and class B and try then try to make a classificaiton model to predict if a logo is given where will my model fit it.

__Future Scope:__

* Logo impression ranking.
* Help users to create better attractive designs.




In [1]:
%matplotlib inline

# os libs
import glob

# plotting libs
import matplotlib.pyplot as plt
import seaborn as sns

# machine learning libs
import tensorflow as tf
import numpy as np
import pandas as pd

In [2]:
def get_image_id(full_filename):
    full_filename =  full_filename.split('/')[-1].split('.')[0]
    if '_' in full_filename:
        image_id = full_filename.split('_')[-1]
        if image_id.isdigit():
            return int(image_id)
    return np.NAN

def get_image_data(filename):
    if filename in ['data/Top 500 Logos_files/interrelated_logo_3652.gif',
                   'data/Top 500 Logos_files/veo_logo_3654.gif']:
        return
    return plt.imread(filename)


def get_image_shape(filename):
    try:
        # Reading Image
        x = get_image_data(filename)
        return x.shape
    except:
        return np.NAN
    
def get_reshaped_image_data(filename):
    # Reading Image
    x = get_image_data(filename)

    # Reshaping
    #    Let the Neural Netword do that optimisations stuff
    a, b, c = x.shape
    y = np.zeros([120, 120, 4])
    y[:a, :b, :c] = x
    return y

In [3]:
# Read local file names
files_B = glob.glob("data/Top 500 Logos 400_files/*")
files_A = glob.glob("data/Top 500 Logos_files/*")

df = pd.DataFrame({'filenames':files_A + files_B})

# drop all the image we can't id
#   as only Logo's have Images ID's
df['image_rank'] = df.filenames.map(get_image_id)
df = df.dropna()
# df = df.drop('image_rank', axis=1)

#### Category of Logo's ####
df['image_class'] = df.filenames.map(lambda x: 'A' if x in files_A else 'B')

df['image_shape'] = df.filenames.map(get_image_shape)
# dropping all images we couldn't read
df= df[~df.image_shape.isnull()]
df = df.reset_index(drop=True)

# df['image_shape_0'] = df['image_shape'].map(lambda x: min(x[:2]))
# df['image_shape_1'] = df['image_shape'].map(lambda x: max(x[:2]))
df['image_shape_2'] = df['image_shape'].map(lambda x: x[2])

# Taking only 4 layered Images
df = df[df.image_shape_2 == 4]
df = df.reset_index(drop=True)

df.head()

Unnamed: 0,filenames,image_rank,image_class,image_shape,image_shape_2
0,data/Top 500 Logos_files/abba_logo_2980.gif,2980.0,A,"(32, 120, 4)",4
1,data/Top 500 Logos_files/aegon_logo_2619.gif,2619.0,A,"(44, 120, 4)",4
2,data/Top 500 Logos_files/after_eight_logo_2933...,2933.0,A,"(100, 120, 4)",4
3,data/Top 500 Logos_files/applebees_logo_3276.gif,3276.0,A,"(100, 84, 4)",4
4,data/Top 500 Logos_files/atandt_logo_3180.gif,3180.0,A,"(100, 100, 4)",4


In [4]:
df = df[['filenames', 'image_class']]

df_index = df.index.tolist()
class_a_index = df[df.image_class == 'A'].index.tolist()
class_b_index = df[df.image_class == 'B'].index.tolist()
np.random.shuffle(class_a_index)
np.random.shuffle(class_b_index)

print('[info] Images that belong to class A', len(class_a_index))
print('[info] Images that belong to class B', len(class_b_index))

[info] Images that belong to class A 96
[info] Images that belong to class B 98


In [5]:
# Test - Train data split
test_train_records_split_count = 70
train_data = class_a_index[:test_train_records_split_count] + class_b_index[:test_train_records_split_count]
test_data = class_a_index[test_train_records_split_count:] + class_b_index[test_train_records_split_count:]

df = df.reset_index(drop=True)
df['train_data'] = df.index.map(lambda x: x in train_data).tolist()
train_df = df[df.train_data][['filenames', 'image_class']].reset_index(drop=True)
test_df = df[~df.train_data][['filenames', 'image_class']].reset_index(drop=True)

# Model building

In [6]:
trX = train_df.filenames.map(get_reshaped_image_data).values
trY = train_df.image_class.map({'A': 0, 'B': 1}).values

teX = test_df.filenames.map(get_reshaped_image_data).values
teY = test_df.image_class.map({'A': 0, 'B': 1}).values

trX = trX.tolist()
teX = teX.tolist()
trY = trY.tolist()
teY = teY.tolist()

print('Shape of Each Image', trX[1].shape)

trY = np.array(trY).reshape([len(trY), 1])
teY = np.array(teY).reshape([len(teY), 1])

Shape of Each Image (120, 120, 4)


In [7]:
def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))

def model(X, p_keep_conv, p_keep_hidden):
    # X shape=(?, 120, 120, 4)
    w = init_weights([3, 3, 4, 32])       # 3x3x4 conv, 32 outputs
    w2 = init_weights([3, 3, 32, 64])     # 3x3x32 conv, 64 outputs
    w3 = init_weights([3, 3, 64, 128])    # 3x3x32 conv, 128 outputs
    w4 = init_weights([128 * 4 * 4, 625]) # FC 128 * 4 * 4 inputs, 625 outputs
    w_o = init_weights([625, 1])          # FC 625 inputs, 10 outputs (labels)

    ############################################################### layer 1
    # l1a shape=(?, 120, 120, 32)
    l1a = tf.nn.relu(tf.nn.conv2d(X,
                                  w,
                                  strides=[1, 1, 1, 1],
                                  padding='SAME'))
    # l1 shape=(?, 30, 30, 32)
    l1 = tf.nn.max_pool(l1a,
                        ksize=[1, 4, 4, 1],
                        strides=[1, 4, 4, 1],
                        padding='SAME')
    l1 = tf.nn.dropout(l1, p_keep_conv)

    ############################################################### layer 2
    # l2a shape=(?, 30, 30, 64)
    l2a = tf.nn.relu(tf.nn.conv2d(l1,
                                  w2,
                                  strides=[1, 1, 1, 1],
                                  padding='SAME'))
    # l2 shape=(?, 15, 15, 64)
    l2 = tf.nn.max_pool(l2a,
                        ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1],
                        padding='SAME')
    l2 = tf.nn.dropout(l2, p_keep_conv)

    ############################################################### layer 3
    # l3a shape=(?, 15, 15, 128)
    l3a = tf.nn.relu(tf.nn.conv2d(l2,
                                  w3,
                                  strides=[1, 1, 1, 1],
                                  padding='SAME'))
    # l3 shape=(?, 4, 4, 128)
    l3 = tf.nn.max_pool(l3a,
                        ksize=[1, 4, 4, 1],
                        strides=[1, 4, 4, 1],
                        padding='SAME')
    # reshape to (?, 2048)
    l3 = tf.reshape(l3, [-1, w4.get_shape().as_list()[0]])
    l3 = tf.nn.dropout(l3, p_keep_conv)
    
    ############################################################### layer 4 - output
    l4 = tf.nn.relu(tf.matmul(l3, w4))
    l4 = tf.nn.dropout(l4, p_keep_hidden)

    pyx = tf.matmul(l4, w_o)
    return pyx

X = tf.placeholder("float", [None, 120, 120, 4])
Y = tf.placeholder("float", [None, 1])

p_keep_conv = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
py_x = model(X, p_keep_conv, p_keep_hidden)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x, labels=Y))
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
predict_op = tf.argmax(py_x, 1)

In [8]:
batch_size = 5
test_size = 54

trX = np.array(trX)
trY = np.array(trY)

teX = np.array(teX)
teY = np.array(teY)

# Launch the graph in a session
with tf.Session() as sess:
    # you need to initialize all variables
    tf.global_variables_initializer().run()

    for i in range(2):
        training_batch = zip(range(0, len(trX), batch_size),
                             range(batch_size, len(trX)+1, batch_size))
        for start, end in training_batch:
            sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end],
                                          p_keep_conv: 0.5, p_keep_hidden: 0.5})

        test_indices = np.arange(len(teX)) # Get A Test Batch
        np.random.shuffle(test_indices)
        test_indices = test_indices[0:test_size]

        print('Iter:', i, '\tScore:', np.mean(np.argmax(teY[test_indices], axis=1) ==
                         sess.run(predict_op, feed_dict={X: teX[test_indices],
                                                         Y: teY[test_indices],
                                                         p_keep_conv: 1.0,
                                                         p_keep_hidden: 1.0})))

Iter: 0 	Score: 1.0
Iter: 1 	Score: 1.0
