In [None]:
import os
import pickle
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from skimage.color import rgb2gray
from sklearn.cluster import KMeans
import re
import unicodedata
from sklearn.preprocessing import OneHotEncoder
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.callbacks import ModelCheckpoint


Part 1 _ CNNs for Image Colorization

In [None]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [None]:
def load_data(path):
    home = os.getcwd()
    os.chdir(path)
    file_names = os.listdir()
    data = []
    
    for f in file_names:
        if('batch' in f):
            if('meta' in f):
                label = unpickle(f)
            else:
                data.append(unpickle(f))
        else:
            pass
    os.chdir(home)
    return data, label


In [None]:
def save_df(file):
    df = pd.DataFrame(file.get(b'data'))
    df['label'] = file.get(b'labels')
    return df


In [None]:
def get_class(name):
    label_names = unpickle((path + '/batches.meta'))
    label_names = list(label_names.get(b'label_names'))    
    return label_names.index(name)


In [None]:
def flatten_data(data):
    total_pixels = data.shape[0] * data.shape[1] * data.shape[2]
    data_flat = data.reshape(total_pixels, data.shape[3])
    return data_flat

In [None]:
home = os.getcwd()
folder_name = 'cifar-10-batches-py'
path = home + '/' + folder_name
dataset, label = load_data(path)
bird_label = get_class(b'bird') 


train = pd.DataFrame()
for d in dataset:
    if(b'test' in d.get(b'batch_label')):
        test = save_df(d)
    else:
        train = train.append(save_df(d))
       
train = train.loc[train['label'] == bird_label, :]
train = train.drop('label', axis = 1)
train = np.array(train).reshape(len(train), 3, 32, 32).transpose(0, 2, 3, 1)
test = test.loc[test['label'] == bird_label, :]
test = test.drop('label', axis = 1)
test = np.array(test).reshape(len(test), 3, 32, 32).transpose(0, 2, 3, 1)
data = np.concatenate((train, test))

In [None]:
k = 4
data_flat = flatten_data(data)
kmeans = KMeans(n_clusters = k, random_state = 78)
kmeans_model = kmeans.fit(data_flat)
main_colors = kmeans_model.cluster_centers_

In [None]:
cluster_labels = kmeans_model.predict(data_flat)
k_colored = []
main_color = []
for i in range(len(cluster_labels)):
    label = cluster_labels[i]
    k_colored.append(main_colors[label])
    color_one_hot = np.zeros(k)
    color_one_hot[label] = 1
    main_color.append(color_one_hot)

k_colored = np.array(k_colored)
k_colored = np.reshape(k_colored, (data.shape[0], 32, 32, 3))

main_color = np.array(main_color)
main_color = np.reshape(main_color, (data.shape[0], 32, 32, 4))

In [None]:
train_grayscale = np.reshape(rgb2gray(train), (train.shape[0], 32, 32, 1))
test_grayscale = np.reshape(rgb2gray(test), (test.shape[0], 32, 32, 1))

In [None]:
CNN = Sequential()
CNN.add(Conv2D(64, 
        kernel_size = (5, 5), 
        input_shape = train_grayscale.shape[1:], 
        activation = 'relu',
        strides = (1, 1),
        padding = 'same'))

CNN.add(MaxPooling2D(pool_size = (2, 2),
                     strides = (1, 1),
                     padding = 'same'))

CNN.add(Conv2D(64,
               kernel_size = (5, 5),
               strides = (1, 1),
               padding = 'same',
               activation = 'relu'))

CNN.add(MaxPooling2D(pool_size = (2, 2),
                     strides = (1, 1),
                     padding = 'same'))

CNN.add(Dense(32, activation = 'softmax'))
CNN.add(Dense(4, activation = 'softmax'))
CNN.compile(loss = 'categorical_crossentropy', optimizer = 'Adam', metrics = ['accuracy'])

In [None]:
folder = "/hw7_part2"
path = home + folder
if(folder in os.listdir() == False):
    os.mkdir(folder)
else:
    pass
os.chdir(path)
file_name_cnn = "cnnmodel-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint_cnn = ModelCheckpoint(file_name_cnn, monitor = 'loss', mode = 'min')

In [None]:
epochs = 60
CNN.fit(train_grayscale, main_color[:len(train)],
              epochs = epochs, shuffle = True, validation_split = .1, callbacks = [checkpoint_cnn])


In [None]:
test_errors = []
for f in sorted(os.listdir()):
    if(f[:8] == "cnnmodel"):
        CNN.load_weights(f)
        test_errors.append(CNN.evaluate(test_grayscale, main_color[len(train):]))

In [None]:
test_errors = pd.DataFrame(test_errors, columns = ['loss', 'accuracy'])

In [None]:
plt.plot(cnn.history['val_loss'])
plt.plot(cnn.history['loss'], 'g')
plt.plot(test_errors['loss'], 'r')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['Train', 'Validation', 'Test'], loc= 'upper right')

In [None]:
best_epoch = np.argmin(cnn.history['val_loss'])
best_model = sorted(os.listdir())[best_epoch]
CNN.load_weights(best_model)

In [None]:
pred = CNN.predict(test_grayscale)
pred = np.reshape(pred, (pred.shape[0]*pred.shape[1]*pred.shape[2], pred.shape[3]))

In [None]:
test_tetra =[]
for i in range(len(pred)):
    label = pred[i].argmax()
    test_tetra.append(main_colors[label].tolist())
test_tetra = np.array(test_tetra)
test_tetra = np.reshape(test_tetra, (test.shape[0], 32, 32, 3))


In [None]:
fig = plt.figure(figsize = (10, 5))
fig.subplots_adjust(hspace = 0, wspace = .1)
for i in range(0, 10):
    fig.add_subplot(2, 5, i+1, xticks = [], yticks = [])
    plt.imshow(train[i])

In [None]:
fig_test = plt.figure(figsize = (5, 10))
fig_test.subplots_adjust(hspace = 0.1, wspace = -.35)
for i in range(0, 10):
    fig_test.add_subplot(5, 2, i+1, xticks = [], yticks = [])
    plt.imshow(test[i])

In [None]:
fig_tetra = plt.figure(figsize = (5, 10))
fig_tetra.subplots_adjust(hspace = 0.1, wspace = -.35)
for i in range(0, 10):
    fig_tetra.add_subplot(5, 2, i+1, xticks = [], yticks = [])
    plt.imshow(test_tetra[i].astype(np.uint8))

In [None]:
fig_gray = plt.figure(figsize = (5, 10))
fig_gray.subplots_adjust(hspace = 0.1, wspace = -.35)
for i in range(0, 10):
    fig_gray.add_subplot(5, 2, i+1, xticks = [], yticks = [])
    plt.imshow(rgb2gray(test)[i], cmap = 'gray')

In [None]:
fig_k = plt.figure(figsize = (5, 10))
fig_k.subplots_adjust(hspace = 0.1, wspace = -.35)
for i in range(0, 10):
    fig_k.add_subplot(5, 2, i+1, xticks = [], yticks = [])
    plt.imshow(k_colored[len(train) + i].astype(np.uint8))
