# Importing data

In [None]:
# Getting the files ready
!rm -rf *
!wget ai-unibuc.zip
!unzip -q ai-unibuc.zip

In [None]:
import numpy as np
import cv2
import os

from skimage import io
from sklearn import preprocessing, svm, metrics

# The dimension of the images (in case you don't resize them, keep it at (50, 50))
dim = (50, 50)

# Denoise the image using a function from CV2
def denoise_image(image):
    return cv2.fastNlMeansDenoising(image, None)

# A function that takes an image and returns the image resized
def resize_image(image):
    return cv2.resize(image, dim, interpolation=cv2.INTER_AREA)

# A function that takes an image and modifies the saturation and brightness
def adjust_image(image, alpha = 1.95, beta = 0):
    return cv2.convertScaleAbs(image, alpha=alpha, beta=beta)

# Preprocessing of an image, you can comment the functions you don't want to run
def preprocess(image): 
    # image = denoise_image(image)
    # image = resize_image(image)
    # image = adjust_image(image)
    return image

# Load the train and validation data
def load_data(type):
    lines = [line for line in open(f'{type}.txt')]
    images = np.empty((len(lines), dim[0], dim[1]), 'float64')
    labels = np.empty((len(lines)), dtype="int64")
    filenames = np.empty((len(lines)), dtype="<U16")

    for index, line in enumerate(lines):
        filename, label = line.split(',')
        image = cv2.imread(os.path.join(type, filename), cv2.IMREAD_GRAYSCALE)
        images[index] = preprocess(image)
        filenames[index] = filename
        labels[index] = int(label.replace('\n', ''))

    return images, labels, filenames
    # return labels, filenames

# Load the test data
def load_test():
    lines = [line for line in open('test.txt')]
    images = np.empty((len(lines), dim[0], dim[1]), 'float32')
    filenames = []
    
    for index, line in enumerate(lines):
        filename, _ = line.split('\n')
        image = cv2.imread(os.path.join('test', filename), cv2.IMREAD_GRAYSCALE)
        images[index] = preprocess(image)
        filenames.append(line.replace('\n', ''))
    return images, filenames
    # return filenames

In [None]:
# Loading the data
train_images, train_labels, train_files = load_data('train')
validation_images, validation_labels, validation_files = load_data('validation')
test_images, test_files = load_test()
# train_labels, train_files = load_data('train')
# validation_labels, validation_files = load_data('validation')
# test_files = load_test()

# print (train_images)
# print (train_labels)

# If we loaded it into the memory we make sure everything is ok

io.imshow(cv2.imread(os.path.join('./train', train_files[0])))
io.show()

io.imshow(train_images[0].astype(np.uint8))
io.show()

# For everything except transfer learning we normalise the data and reshape it
train_images /= 255
validation_images /= 255
test_images /= 255

train_images_reshaped = train_images.reshape((train_images.shape[0], train_images.shape[1] * train_images.shape[2]))
validation_images_reshaped = validation_images.reshape((validation_images.shape[0], validation_images.shape[1] * validation_images.shape[2]))
test_images_reshaped = test_images.reshape((test_images.shape[0], test_images.shape[1] * test_images.shape[2]))

In [None]:
# Optional: if we don't load the data in memory, we move it in folder to use dataGenerators

cwd = os.getcwd()

for label in range(0, 3):
    if not os.path.exists(os.path.join(cwd, f'train/class_{label}')):
        os.mkdir(os.path.join(cwd, f'train/class_{label}'))
    if not os.path.exists(os.path.join(cwd, f'validation/class_{label}')):
        os.mkdir(os.path.join(cwd, f'validation/class_{label}'))
if not os.path.exists(os.path.join(cwd, f'test/test_folder')):
    os.mkdir(os.path.join(cwd, f'test/test_folder'))

for label, filename in zip(train_labels, train_files):
    os.rename(os.path.join(cwd, f'train/{filename}'),
              os.path.join(cwd, f'train/class_{label}/{filename}'))


for label, filename in zip(validation_labels, validation_files):
    os.rename(os.path.join(cwd, f'validation/{filename}'),
              os.path.join(cwd, f'validation/class_{label}/{filename}'))

for filename in test_files:
    os.rename(os.path.join(cwd, f'test/{filename}'),
              os.path.join(cwd, f'test/test_folder/{filename}'))

In [None]:
from keras.preprocessing.image import ImageDataGenerator

train_generator =  ImageDataGenerator(rescale= 1./255).flow_from_directory(
    './train/',
    batch_size=32,
    color_mode='rgb',
    target_size=dim
)

validation_generator = ImageDataGenerator(rescale=1. / 255).flow_from_directory(
    './validation/',
    batch_size=32,
    color_mode='rgb',
    target_size=dim
)

test_generator = ImageDataGenerator(rescale=1. / 255).flow_from_directory(
    './test/',
    shuffle=False,
    classes=None,
    color_mode='rgb',
    target_size=dim
)

# NB

In [None]:
from sklearn.naive_bayes import MultinomialNB

# These two functions are taken from the second laboratory
def interval_calculator(num_bins):
    return np.linspace(start=0, stop=1, num=num_bins)

def value_to_bins(x, bins):
    x_to_bins = np.digitize(x, bins, right=True)
    return x_to_bins

In [None]:
num_bins = 9
bins = interval_calculator(num_bins)
train_images_bins = value_to_bins(train_images_reshaped, bins)

naive_bayes_model = MultinomialNB()
naive_bayes_model.fit(train_images_bins, train_labels)

validation_images_bins = value_to_bins(validation_images_reshaped, bins)
naive_bayes_model.predict(validation_images_bins)
print (f'Validation score = {naive_bayes_model.score(validation_images_bins, validation_labels)}')

test_images_bins = value_to_bins(test_images_reshaped, bins)
predictions = naive_bayes_model.predict(test_images_bins)

with open('submission.txt', 'w') as file:
    file.write(f'id,label\n')
    for elem in zip(test_files, predictions):
        file.write(f'{elem[0]},{elem[1]}\n')

# KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier

knn_classifier = KNeighborsClassifier(n_neighbors = 11, metric = 'l2', n_jobs = -1)
# knn_classifier = KNeighborsClassifier(n_neighbors = 9, metric = 'l1', n_jobs = -1)
knn_classifier.fit(train_images_reshaped, train_labels)

knn_classifier.predict(validation_images_reshaped)
print (f'Validation score = {knn_classifier.score(validation_images_reshaped, validation_labels)}')

predictions = knn_classifier.predict(test_images_reshaped)
with open('submission.txt', 'w') as file:
    file.write(f'id,label\n')
    for elem in zip(test_files, predictions):
        file.write(f'{elem[0]},{elem[1]}\n')

# SVM

In [None]:
from sklearn import svm, metrics

clf = svm.SVC(C=3, kernel='rbf')
clf.fit(train_images_reshaped, train_labels)
print (f'Validation score = {metrics.accuracy_score(validation_labels, clf.predict(validation_images_reshaped))}')

predictions = clf.predict(test_images_reshaped)
with open('submission.txt', 'w') as file:
    file.write(f'id,label\n')
    for elem in zip(test_files, predictions):
        file.write(f'{elem[0]},{elem[1]}\n')

# MLP

In [None]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(hidden_layer_sizes=(100,), 
                                activation='relu', 
                                solver='adam', 
                                alpha=0.05,
                                max_iter=2000)
mlp.fit(train_images_reshaped, train_labels)
print (f'Validation score = {mlp_classifier.score(validation_images_reshaped, validation_labels)}')

predictions = mlp.predict(test_images_reshaped)
with open('submission.txt', 'w') as file:
    file.write(f'id,label\n')
    for elem in zip(test_files, predictions):
        file.write(f'{elem[0]},{elem[1]}\n')

# CNN

In [None]:
import tensorflow as tf

from tensorflow.keras import datasets, layers, models, losses, regularizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_images_reshaped = train_images.reshape((-1, 50, 50, 1))
validation_images_reshaped = validation_images.reshape((-1, 50, 50, 1))
test_images_reshaped = test_images.reshape((-1, 50, 50, 1))

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(50, 50, 1)))
model.add(layers.MaxPooling2D())
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D())
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D())
model.add(layers.Conv2D(32, (3, 3), activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Flatten())
model.add(layers.Dense(16))
model.add(layers.Dense(3, activation='softmax'))
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

history = model.fit(train_images_reshaped, train_labels, batch_size=32, epochs=100, 
                    validation_data=(validation_images_reshaped, validation_labels))

predictions = model.predict_classes(test_images_reshaped)
with open('submission.txt', 'w') as file:
    file.write(f'id,label\n')
    for elem in zip(test_files, predictions):
        file.write(f'{elem[0]},{elem[1]}\n')

# Transfer learning - Tensorflow


In [None]:
import tensorflow as tf

from keras import applications, layers, regularizers, optimizers, models
from keras.applications.vgg16 import VGG16

In [None]:
base_model = VGG16(include_top=False, input_shape=(224,224,3), pooling='avg')

for layer in base_model.layers:
    layer.trainable =False

x = layers.Flatten()(base_model.layers[-1].output)
x = layers.Dense(3, kernel_regularizer=regularizers.l2(0.01),
            activity_regularizer=regularizers.l1(0.01), activation='softmax')(x)
model = models.Model(inputs = base_model.input, outputs = x)

model.compile(optimizer=optimizers.SGD(learning_rate=1e-3),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])

history = model.fit_generator(
    train_generator,
    epochs=20,
    validation_data=validation_generator)
probabilities = model.predict(test_images) 
predictions = probabilities.argmax(axis=-1)

with open('submission.txt', 'w') as file:
    file.write(f'id,label\n')
    for elem in zip(test_files, predictions):
        file.write(f'{elem[0]},{elem[1]}\n')

# Transfer learning - PyTorch

In [None]:
!pip install torch
!pip install torchvision
!pip install torchdata

In [None]:
import torch

from torch.optim import SGD
from torch.nn import CrossEntropyLoss, Linear

from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

from torchvision.models import resnet50, alexnet, vgg16

from torchvision.transforms import Resize, ToTensor, Normalize, Compose

In [None]:
def score(loader):
    correct = 0
    total = 0

    with torch.no_grad():
        for (images, labels) in loader:
            x_input = images.to(device)
            y_true = labels.to(device)
            y_pred = torch.argmax(model(x_input), dim=1)
            
            total += y_pred.size(0)
            correct += (y_pred == y_true).sum()

    return correct / total

def predict_classes(loader):
    y_preds = []

    with torch.no_grad():
        for (images, labels) in loader:
            x_input = images.to(device)

            y_pred = model(x_input)

            y_pred = torch.argmax(model(x_input), dim=1)
            y_preds.extend(y_pred.tolist())

    return y_preds

def fit(train_loader, validation_loader, epochs):
    for epoch in range(1, epochs + 1):
        train_loss = 0.0
        val_loss = 0.0
        
        for (images, labels) in train_loader:
            optimizer.zero_grad()
            
            x_train = images.to(device)
            y_true = labels.to(device)

            y_pred = model(x_train)

            (loss_function(y_pred, y_true)).backward()
            optimizer.step()

        print (f'Epoch {epoch} | val_acc: {score(validation_loader)}')

In [None]:
model = resnet50(pretrained=True)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
loss_function = CrossEntropyLoss()

layers = [Resize(224), 
        ToTensor(), 
        Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]                                               

transform = Compose(layers)

optimizer = SGD(model.parameters(),
                lr=1e-3,
                momentum=0.9)


train_loader = DataLoader(ImageFolder(root='train/', transform=transform), batch_size=32, shuffle=True)
validation_loader = DataLoader(ImageFolder(root='validation/', transform=transform), batch_size=32, shuffle=True)
test_loader = DataLoader(ImageFolder(root='test/', transform=transform), batch_size=32, shuffle=False)

# model.eval()
# model.classifier[-1] = Linear(model.classifier[-1].in_features, 3)
model.fc = Linear(model.fc.in_features, 3)


model.to(device)
fit(train_loader, validation_loader, 2)

y_pred = predict_classes(test_loader)

with open('submission.txt', 'w') as file:
    file.write(f'id,label\n')
    for elem in zip(sorted(test_files), y_pred):
        file.write(f'{elem[0]},{elem[1]}\n')