In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow.keras.layers import Dense, Activation, Dropout, Conv2D, MaxPooling2D, MaxPool2D, BatchNormalization, Flatten
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.losses import CategoricalCrossentropy, MeanSquaredError
from tensorflow.keras.metrics import MeanAbsoluteError, mean_absolute_error, mean_squared_error
from tensorflow.keras import activations, regularizers
from tensorflow.keras.models import Model, load_model, Sequential
from tensorflow.keras.applications import VGG16, ResNet50, MobileNet

In [None]:
train_data_path = "../input/psf-dataset/train.csv"
validation_data_path = "../input/psf-dataset/validation.csv"
test_data_path = "../input/psf-dataset/test.csv"

train_image_path = "../input/psf-dataset/train"
test_image_path = "../input/psf-dataset/test"
validation_image_path = "../input/psf-dataset/validation"

In [None]:
def load_coefs(path):
    data_info = pd.read_csv(path)
    data_info["img"] = data_info["img"].astype(str)
    data_info = data_info.sort_values(by="img")
    data_info = data_info.filter(like="coef")
    return data_info.to_numpy()

In [None]:
IMAGE_SIZE = (128, 128)
IMAGE_SHAPE = (*IMAGE_SIZE, 3)
BATCH_SIZE = 32
RANDOM_SEED = 42
LEARNING_RATE = 5E-6
N_COEFFICIENTS = 10

In [None]:
train_coefs = tf.data.Dataset.from_tensor_slices(load_coefs(train_data_path))
validation_coefs = tf.data.Dataset.from_tensor_slices(load_coefs(validation_data_path))
test_coefs = tf.data.Dataset.from_tensor_slices(load_coefs(test_data_path))

In [None]:
train_img_dataset = tf.keras.utils.image_dataset_from_directory(train_image_path, labels=None, image_size=IMAGE_SIZE, 
                                                                batch_size=1, shuffle=False)
train_img_dataset = train_img_dataset.unbatch()

validation_img_dataset = tf.keras.utils.image_dataset_from_directory(validation_image_path, labels=None, image_size=IMAGE_SIZE, 
                                                                batch_size=1, shuffle=False)
validation_img_dataset = validation_img_dataset.unbatch()

test_img_dataset = tf.keras.utils.image_dataset_from_directory(test_image_path, labels=None, image_size=IMAGE_SIZE, 
                                                                batch_size=1, shuffle=False)
test_img_dataset = test_img_dataset.unbatch()

In [None]:
train_ds = tf.data.Dataset.zip((train_img_dataset, train_coefs))
validation_ds = tf.data.Dataset.zip((validation_img_dataset, validation_coefs))
test_ds = tf.data.Dataset.zip((test_img_dataset, test_coefs))

In [None]:
def preprocess_image(img):
    img = img / 255
    return img

In [None]:
train_ds = train_ds.map(lambda x, y: (preprocess_image(x), y)).shuffle(640).batch(batch_size=BATCH_SIZE, drop_remainder=True).prefetch(-1)
validation_ds = validation_ds.map(lambda x, y: (preprocess_image(x), y)).batch(batch_size=BATCH_SIZE, drop_remainder=True).prefetch(-1)
test_ds = test_ds.map(lambda x, y: (preprocess_image(x), y))

In [None]:
def create_custom_model(input_shape=IMAGE_SHAPE):
    cnn_model = tf.keras.Sequential();
    cnn_model.add(Conv2D(32, 3, input_shape=input_shape, activation='relu'))
    cnn_model.add(MaxPool2D(2))
    cnn_model.add(Conv2D(60, 3, activation='relu'))
    cnn_model.add(MaxPool2D(2))
    cnn_model.add(Conv2D(100, 3, activation='relu'))
    cnn_model.add(Conv2D(100, 3, activation='relu'))
    cnn_model.add(MaxPool2D(2))
    cnn_model.add(Flatten())
    cnn_model.add(Dense(1000, activation='relu'))
    cnn_model.add(Dropout(0.25))
    cnn_model.add(Dense(10, activation='linear'))
    return cnn_model

def create_model(n_coefs=N_COEFFICIENTS):
    base_model = tf.keras.applications.EfficientNetB0(include_top=False, weights="imagenet", input_shape=IMAGE_SHAPE, pooling='max') 
    x = base_model.output
    x = BatchNormalization(momentum=0.99, epsilon=0.001, axis=-1)(x)
    x = Dense(256, kernel_regularizer = regularizers.l2(l = 0.016), activity_regularizer=regularizers.l1(0.006), bias_regularizer=regularizers.l1(0.006) ,activation='relu')(x)
    x = Dropout(rate=0.1, seed=RANDOM_SEED)(x)        
    output = Dense(n_coefs, activation='linear')(x)
    model = Model(inputs=base_model.input, outputs=output)
    return model

In [None]:
model = create_model()
optimizer = Adam(learning_rate=LEARNING_RATE)
loss = MeanSquaredError(name="loss")
metrics = [MeanAbsoluteError(name="mae")]

model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [None]:
model.summary()

In [None]:
%%time
EPOCHS = 200
history = model.fit(train_ds, validation_data=validation_ds, epochs=EPOCHS, batch_size=BATCH_SIZE)

In [None]:
test_imgs = np.array(list(test_ds.map(lambda x, y: x)))
test_coefs = np.array(list(test_ds.map(lambda x, y: y)))

In [None]:
pred_coefs = model.predict(test_imgs)

In [None]:
print("Средняя MAE на тесте:", np.mean(mean_absolute_error(test_coefs, pred_coefs).numpy()) )
print("Средняя MSE на тесте:", np.mean(mean_squared_error(test_coefs, pred_coefs).numpy()))

In [None]:
import random
random.seed(20)
ises = []
ises.append(random.randrange(0, 3000, 3))
ises.append(random.randrange(0, 3000, 3))
ises.append(random.randrange(0, 3000, 3))
for i in ises:
    fig = plt.figure(figsize=(10, 4))
    ax = fig.add_subplot()
    w = 0.1
    x1 = np.arange(4, 14) - w
    x2 = np.arange(4, 14) + w
    y1 = test_coefs[i]
    y2 = pred_coefs[i]
    print(i)
    print(test_coefs[i])
    print(pred_coefs[i])
    rects1 = ax.bar(x1, y1, width= 0.2, label='Истинное')
    rects2 = ax.bar(x2, y2, width= 0.2, label='Предсказанное')
    ax.set_xlabel('Номер коэффициента Цернике')
    ax.set_ylabel('Значение коэффициента')
    ax.set_xticks([i for i in range(4, 14)])
    ax.legend(loc=0)
    plt.show()
    fig.savefig(f"{i}.pdf", bbox_inches='tight')