In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
from PIL import Image
import albumentations as A

from sklearn.metrics import f1_score, classification_report
import pickle
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from numpy import array
from random import shuffle, seed

In [None]:
list_paths = []
for subdir, dirs, files in os.walk("../input"):
    for file in files:
        filepath = subdir + os.sep + file
        list_paths.append(filepath)
        
list_train = [filepath for filepath in list_paths if "train/" in filepath]
seed(420)
shuffle(list_train)
list_test = [filepath for filepath in list_paths if "test/" in filepath]

In [None]:
list_train[:5]

In [None]:
def get_class_from_path(filepath):
    return os.path.dirname(filepath).split(os.sep)[-1]

In [None]:
labels = [get_class_from_path(filepath) for filepath in list_train]

In [None]:
labels[:5]

In [None]:
train_data = pd.DataFrame(labels, columns=['class'])
train_data['path'] = list_train
train_data.head()

In [None]:
# def open_image(img_path):
#     image = cv2.imread(img_path)
# #     array_img = np.array(image)
#     return image

# def crop(img):
#     width, height = img.size  # Get dimensions

#     left = (width - 112) / 2
#     top = (height - 112) / 2
#     right = (width + 112) / 2
#     bottom = (height + 112) / 2

#     return img.crop((left, top, right, bottom))


# def gamma_correction(array_img, gamma):
#     invGamma = 1.0 / gamma
#     table = np.array([((i / 255.0) ** invGamma) * 255 for i in np.arange(0, 256)]).astype("uint8")

#     return cv2.LUT(array_img, table)


# def jpg_compression(array, quality):
#     img = Image.fromarray(array)
#     img.save('img.jpg', "JPEG", quality=quality)
#     return cv2.cvtColor(cv2.imread('img.jpg'), cv2.COLOR_BGR2RGB)


# def resizing(array_img, factor):
#     h, w, ch = array_img.shape
#     return cv2.resize(array_img, (int(factor * w), int(factor * h)), interpolation=cv2.INTER_CUBIC)

In [None]:
train_data['class'].value_counts().sort_values().plot(kind='bar')

In [None]:
X = train_data['path']
y = train_data['class']

In [None]:
splitter = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
splits = list(splitter.split(X=X, y=y))

In [None]:
y.shape

In [None]:
y = pd.get_dummies(y)
y.head()

In [None]:
dict_map = {"0": "HTC-1-M7", 
            "1": "LG-Nexus-5x", 
            "2": "Motorola-Droid-Maxx", 
            "3": "Motorola-Nexus-6", 
            "4": "Motorola-X", 
            "5": "Samsung-Galaxy-Note3",
            "6": "Samsung-Galaxy-S4",
            "7": "Sony-NEX-7",
            "8": "iPhone-4s",
            "9": "iPhone-6"}

In [None]:
def open_image(img_path):
    image = cv2.imread(img_path)
#     array_img = np.array(image)
    return image

In [None]:
def preprocess(images):
    return (images / 127.5) - 1.0

In [None]:
SHAPE = 512

train_augmentations = A.Compose([#A.RGBShift(),
                                 A.RandomCrop(height=SHAPE,width=SHAPE),
                                 A.RandomGamma(gamma_limit=(80, 120), p=0.8),
                                #  A.Blur(),
                                #  A.GaussNoise(),
                                 A.JpegCompression(quality_lower=70, quality_upper=90, p=0.9),
                                 A.GridDistortion(interpolation=cv2.INTER_CUBIC, p=0.9)
                                 ])

teste_augmentations = A.Compose([#A.RGBShift(),
                                 A.CenterCrop(height=SHAPE,width=SHAPE)
#                                  A.RandomGamma(),
                                #  A.Blur(),
                                #  A.GaussNoise(),
#                                  A.JpegCompression(quality_lower=70, quality_upper=100, p=0.5),
#                                  A.GridDistortion(interpolation=cv2.INTER_CUBIC, p=0.5)                                 
                                 ])

In [None]:
class CameraDataset(tf.keras.utils.Sequence):
    def __init__(self, X_set, y_set, batch_size, augmenter=None, test=False, *args, **kwargs):
        
        self.batch_size = batch_size
        self.x_set = X_set
        self.y_set = y_set
        self.test = test
        self.augmenter = augmenter
        
    def __len__(self):
        return int(len(self.x_set) / self.batch_size)
    
    
    def __getitem__(self, index):
        X = self.x_set[index * self.batch_size : (index + 1) * self.batch_size]        
        y = self.y_set[index * self.batch_size : (index + 1) * self.batch_size]
        
        X = [(self.augmenter(image=open_image(x))['image']) for x in X]
        
        if self.test:
            return np.array(X)
        
        return np.array(X), y.values

In [None]:
train_dataset = CameraDataset(X, y, batch_size=8, augmenter=train_augmentations)

In [None]:
x_set, y_set = train_dataset.__getitem__(50)

In [None]:
x_set[0].shape

In [None]:
y_set[0]

In [None]:
plt.imshow(x_set[3])

In [None]:
def build_model():
#     inputs =tf.keras.layers.Input(shape=(112,112,3))    
#     model = tf.keras.applications.EfficientNetB0(include_top=False, input_tensor=inputs, weights="imagenet", classes=10)
#     model.trainable = True    
#     x = tf.keras.layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
#     x = tf.keras.layers.BatchNormalization()(x)
#     top_dropout_rate = 0.5
#     x = tf.keras.layers.Dropout(top_dropout_rate)(x)
#     x = tf.keras.layers.Dense(512, activation='relu')(x)
#     x = tf.keras.layers.Dropout(top_dropout_rate)(x)
#     x = tf.keras.layers.BatchNormalization()(x)
#     x = tf.keras.layers.Dropout(top_dropout_rate)(x)
#     x = tf.keras.layers.Dense(512, activation='relu')(x)
#     x = tf.keras.layers.BatchNormalization()(x)
#     x = tf.keras.layers.Flatten()(x)    
#     outputs = tf.keras.layers.Dense(10, activation='softmax')(x)    
#     model =tf.keras.Model(inputs=inputs, outputs=outputs)    
#     model.compile(optimizer=tf.keras.optimizers.Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
#     model.summary()
    #-------------------------------------------------------------------
    base_model = tf.keras.applications.DenseNet201(weights='imagenet', include_top=False, input_shape=[SHAPE, SHAPE, 3])
    base_model.trainable = True
    inputs =tf.keras.layers.Input(shape=(SHAPE,SHAPE,3))
    x = tf.keras.applications.densenet.preprocess_input(inputs)
    x = base_model(x, training=True)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.3)(x)
    x = tf.keras.layers.Dense(32, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.3)(x)
    outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

    model =tf.keras.Model(inputs=inputs, outputs=outputs)

    model.compile(optimizer=tf.keras.optimizers.Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

    model.summary() 
    return model

In [None]:
# model = build_model()

In [None]:
# def get_callbacks_list(fold):
    
#     path = f"output/fold{fold}/"
    
#     checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=path + "weights.h5", monitor="val_accuracy", save_best_only=True, mode='max')
    
#     reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_accuracy", factor=0.9, patience=2, min_lr=1e-6, mode="max", verbose=True)
    
#     early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_accuracy", patience=5, mode="max", verbose=True)
    
    
#     return [checkpoint, reduce_lr, early_stopping]

In [None]:
# val_f1 = []
# import gc


# for n_fold, (t_idx, v_idx) in enumerate(splits):
#     #if n_fold < 1: treinar por só uma época
# #     X_train = [list_train[x] for x in t_idx]
# #     y_train = [y[x] for x in t_idx]
# #     X_val = [list_train[x] for x in v_idx]
# #     y_val = [y[x] for x in v_idx]
    
#     X_train = X[X.index.isin(t_idx)]
#     y_train = y[y.index.isin(t_idx)]
#     X_val = X[X.index.isin(v_idx)]
#     y_val = y[y.index.isin(v_idx)]
    

#     train_dataset = CameraDataset(X_train, y_train, batch_size=32,augmenter=train_augmentations)
#     val_dataset = CameraDataset(X_val, y_val, batch_size=32,augmenter=teste_augmentations)
    
#     callbacks_list = get_callbacks_list(n_fold)
    
#     model = build_model()
    
#     history = model.fit(train_dataset, validation_data=val_dataset, epochs=1, callbacks=callbacks_list)
    
#     model.load_weights(f"output/fold{n_fold}/weights.h5")
    
#     x_test = CameraDataset(X_val, y_val, batch_size=32, test=True, augmenter=teste_augmentations)
    
#     y_pred = model.predict(x_test)
    
#     del model
#     gc.collect()
#     f1score = f1_score(y_val.values.argmax(axis=1), y_pred.argmax(axis=1), average='micro')
#     print(classification_report(y_val.values.argmax(axis=1), y_pred.argmax(axis=1), digits=3))
    
#     val_f1.append(f1score)
    
# print(f"Final f1 score: {np.mean(val_f1)}")

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True, stratify=y)

train_dataset = CameraDataset(X_train, y_train, batch_size=10,augmenter=train_augmentations)
val_dataset = CameraDataset(X_test, y_test, batch_size=10,augmenter=teste_augmentations)

In [None]:
model = build_model()

In [None]:
file_path = "output/weights.best.hdf5"
n = 0

checkpoint = tf.keras.callbacks.ModelCheckpoint(file_path, monitor="val_accuracy", save_best_only=True, mode='max')

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_accuracy", factor=0.9, patience=2, min_lr=1e-6, mode="max", verbose=True)

early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_accuracy", patience=5, mode="max", verbose=True)

callbacks_list = [checkpoint, reduce_lr, early_stopping]

# model = tf.keras.models.load_model(file_path)

history = model.fit(train_dataset, validation_data=val_dataset, epochs=10, callbacks=callbacks_list)

while(n<2):
  print(n)
  if n>0:
    model = tf.keras.models.load_model(file_path)

  n += 1

  history = model.fit(train_dataset, validation_data=val_dataset, epochs=10, batch_size=10, callbacks=callbacks_list)

In [None]:
sample_submission = pd.read_csv('../input/sp-society-camera-model-identification/sample_submission.csv')
sample_submission.head()

In [None]:
X_test = '../input/sp-society-camera-model-identification/test/test/' + sample_submission['fname']

In [None]:
X_test[:5]

In [None]:
def read_and_resize(filepath):
    im_array = np.array(Image.open((filepath)), dtype="uint8")
    pil_im = Image.fromarray(im_array)
    new_array = np.array(pil_im)
    return new_array

In [None]:
def crop(img):
    width, height = img.size  # Get dimensions

    left = (width - 224) / 2
    top = (height - 224) / 2
    right = (width + 224) / 2
    bottom = (height + 224) / 2

    return img.crop((left, top, right, bottom))

In [None]:
file_path = "output/weights.best.hdf5"
model = tf.keras.models.load_model(file_path)
X_test = np.array([read_and_resize(filepath) for filepath in X_test])
pred_mean = model.predict(X_test)

In [None]:
labels_test=[]
for item in pred_mean.argmax(axis=1):
    labels_test.append(dict_map[str(item)])

In [None]:
pred_mean.argmax(axis=1)[:10]

In [None]:
labels_test[:10]

In [None]:
sample_submission['camera'] = labels_test
sample_submission.head()

In [None]:
  sample_submission['camera'].value_counts()

In [None]:
sample_submission.to_csv("submission.csv", index=False)

In [None]:
sample_submission['camera'].value_counts().sort_values().plot(kind='bar')