In [1]:
import keras
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import json
import os
import sklearn as skl
from sklearn import tree
from keras_preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import load_img
from keras import Sequential
from keras import layers, regularizers, optimizers

Using TensorFlow backend.


In [2]:
TRAIN_PATH = 'data/attributes/forced_tweaked/train.csv'
TEST_PATH = 'data/attributes/forced_tweaked/valid.csv'
FULL_PATH = 'data/attributes/flags.csv'
IMG_PATH = 'data/flags_png'
cols = ['name', 'red', 'green', 'blue', 'gold', 'white', 'black', 'orange', 'crescent', 'triangle', 'icon', 'animate', 'text']

In [None]:
train_df = pd.read_csv(
    TRAIN_PATH, 
    header=None, 
    usecols=[0, 10, 11, 12, 13, 14, 15, 16, 23, 24, 25, 26, 27],
    names=cols)
test_df = pd.read_csv(
    TEST_PATH, 
    header=None, 
    usecols=[0, 10, 11, 12, 13, 14, 15, 16, 23, 24, 25, 26, 27],
    names=cols)
train_df.append(test_df)
countries_json_path = 'data/countries.json'
countries = open(countries_json_path, 'r')
countries_str = countries.read()
countries_map = json.loads(countries_str)
filename_map = {}
reverse_map = {}
for k, v in countries_map.items():
    fname = k.lower() + '.png'
    filename_map[v] = fname
    reverse_map[fname] = v
names = train_df['name']
flags = os.listdir(IMG_PATH)
for f in names:
    if f not in filename_map:
        print('Not in map:', f)
    elif filename_map[f] not in flags:
        print('Not in files:', filename_map[f])
        print(f)

In [3]:
train_df = pd.read_csv(
    TRAIN_PATH, 
    header=None, 
    usecols=[0, 10, 11, 12, 13, 14, 15, 16, 23, 24, 25, 26, 27],
    names=cols)
test_df = pd.read_csv(
    TEST_PATH, 
    header=None, 
    usecols=[0, 10, 11, 12, 13, 14, 15, 16, 23, 24, 25, 26, 27],
    names=cols)
countries_json_path = 'data/countries.json'
countries = open(countries_json_path, 'r')
countries_str = countries.read()
countries_map = json.loads(countries_str)
filename_map = {}
reverse_map = {}
for k, v in countries_map.items():
    fname = k.lower() + '.png'
    filename_map[v] = fname
    reverse_map[fname] = v
train_df['name'] = train_df['name'].apply(lambda x: filename_map[x])
test_df['name'] = test_df['name'].apply(lambda x: filename_map[x])



In [4]:
datagen=ImageDataGenerator(rescale=1./255.)
test_datagen=ImageDataGenerator(rescale=1./255.)

train_ds = datagen.flow_from_dataframe(
    train_df,
    directory=IMG_PATH,
    x_col='name',
    y_col=cols[1:],
    batch_size=16,
    seed=803,
    shuffle=True,
    class_mode='raw',
    target_size=(125, 250),
)
test_ds = test_datagen.flow_from_dataframe(
    test_df,
    directory=IMG_PATH,
    x_col='name',
    y_col=cols[1:],
    batch_size=16,
    seed=803,
    shuffle=True,
    class_mode='raw',
    target_size=(125, 250),
)

Found 144 validated image filenames.
Found 49 validated image filenames.


In [None]:
model_v1 = Sequential([
    layers.Conv2D(32, 3, padding='same', input_shape=(125, 250, 3), activation='relu'),
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(2),

    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(2),

    layers.Conv2D(128, 3, padding='same', activation='relu'),
    layers.Conv2D(128, 3, padding='same', activation='relu'),
    layers.Conv2D(128, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(2),

    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(12, activation='sigmoid')
])
model_v1.summary()

In [None]:
keras.utils.plot_model(model_v1, to_file='CNN_Model.png', show_shapes=True, show_layer_names=False,)

In [None]:
model_v1.compile(optimizers.SGD(learning_rate=0.01), loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
model_v1.fit_generator(generator=train_ds,
                    validation_data=test_ds,
                    epochs=30
)

In [None]:
test_ds.reset()
pred = model_v1.predict_generator(test_ds, verbose=1)

pred_bool = (pred > 0.5)
predictions = pred_bool.astype(int)

results = pd.DataFrame(predictions, columns=cols[1:])
results["name"] = test_ds.filenames
ordered_cols = ["name"] + cols[1:]
results = results[ordered_cols]
results['name'] = results['name'].apply(lambda x: reverse_map[x])
results.to_csv("results.csv",index=False)

In [None]:
keras.utils.plot_model(model_v1, to_file='cnn_model_v1.png', show_shapes=True,)

In [None]:
fig, ax = plt.subplots(1, 1)

ax.plot(model_v1.history.history['accuracy'])
ax.plot(model_v1.history.history["val_accuracy"])
ax.set_title("Model {}".format("Accuracy"))
ax.set_xlabel("Epochs")
ax.set_ylabel('Accuracy')
ax.legend(["train", "val"])

fig.savefig('NN Accuracy Graph')

In [5]:

names_train = train_df['name']
labels_train = train_df[cols[1:]]
img_train = []
for n in names_train:
    img = load_img(IMG_PATH + '/' + n)
    img_train.append(np.array(img))
img_train_flat = np.array(list(map( lambda e: e.flatten(), img_train)))
labels_train_proper = labels_train.to_numpy()
print(len(img_train_flat))
print(len(img_train_flat[0]))
print(len(labels_train_proper[0]))
print(len(labels_train_proper))


144
125250
12
144


In [7]:
svm_clf = tree.DecisionTreeClassifier()
svm_clf.fit(img_train_flat.tolist(), labels_train_proper.tolist())

ValueError: setting an array element with a sequence.