## Mount the google drive and then change the current directory

In [1]:
! ls

data  drive  nima  sample_data


In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


Create a data folder and then copy data to it
Copy the project code 

In [2]:
! mkdir data
! ls data
! cp -r /content/drive/MyDrive/Colab\ Notebooks/img-quality-assesment/data/AVA /content/data
! cp -r /content/drive/MyDrive/Colab\ Notebooks/img-quality-assesment/nima /content

^C


### Import the libraries
create some path variables

In [1]:
! pip install livelossplot



In [None]:
! pip install modin[dask]

In [2]:
import os
import matplotlib.pyplot as plt
import numpy as np
# import pandas as pd
import modin.pandas as pd
import seaborn as sns
import random
import sys

# PROJECT_PATH = '/content/' # for google colab
PROJECT_PATH = '/home/jovyan/work' # for local run
AVA_DIR = os.path.join(PROJECT_PATH,'DATA', 'AVA')
AVA_DATASET_DIR = os.path.join(AVA_DIR, 'images')
print(f'Adding project module {PROJECT_PATH}')
sys.path.append(PROJECT_PATH)



Adding project module /home/jovyan/work


In [3]:
from nima.utils.ava_preprocess import get_ava_csv_df, get_tags_df, get_rating_columns
from nima.model.model_builder import NIMA
from nima.model.loss import earth_movers_distance

In [4]:
ava_csv_df = get_ava_csv_df()
tags_df = get_tags_df()
ratings_column = get_rating_columns()

To request implementation, send an email to feature_requests@modin.org.


In [21]:
df = ava_csv_df[['image_id']]
df['ratings'] = ava_csv_df[ratings_column].apply(lambda x : x.values.tolist(), axis=1)
df.head()



Unnamed: 0,image_id
count,15084.0
mean,108618.653076
std,103054.764075
min,101.0
25%,102985.75
50%,117016.5
75%,129112.25
max,954229.0


In [30]:
df['batch_id']  = np.ceil(df['image_id']/10000)
df['batch_id'] = df['batch_id'].astype(int)
df.groupby(['batch_id']).count()



Unnamed: 0_level_0,image_id,ratings
batch_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,246,246
2,2910,2910
11,2541,2541
12,2813,2813
13,3071,3071
14,2757,2757
15,546,546
78,59,59
96,141,141


In [37]:
df[df['batch_id'] == 11]['image_id']

601      102266
602      103678
603      103758
604      103538
605      103908
          ...  
14149    109748
14150    109547
14151    108583
14152    109552
14153    109760
Name: image_id, Length: 2541, dtype: int64

In [11]:
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(df, test_size=0.2, shuffle=True)
train_df, valid_df = train_test_split(train_df, test_size=0.1)

weight_filepath = 'vgg19_weights_best.hdf5'

In [13]:
import tensorflow.keras as keras
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dropout, Dense, MaxPool2D, Conv2D, Flatten
from keras.models import Model, Sequential
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from livelossplot.inputs.keras import PlotLossesCallback
from keras.optimizers import Adam

from keras.applications.vgg19 import VGG19, preprocess_input, decode_predictions

In [14]:
# create a vgg model 
vgg_model = VGG19(include_top=False, input_shape=(256,256,3), pooling='avg')
# set all layers to false trainable
for layer in vgg_model.layers:
  layer.trainable=False


x = Flatten()(vgg_model.output)
x = Dense(256, activation='relu', kernel_initializer='he_uniform')(x)
x = Dropout(0.2)(x)
x = Dense(256, activation='relu')(x)
x = Dense(10, activation='softmax')(x)

model = Model(inputs=vgg_model.input, outputs=x)
model.summary()

vgg_model.compile(loss=earth_movers_distance, optimizer=Adam(), metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 256, 256, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 256, 256, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 256, 256, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 128, 128, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 128, 128, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 128, 128, 128)    

In [16]:
# define callbacks
earlystopping = EarlyStopping(monitor="val_loss", patience=5)

weight_filepath = 'vgg_model_weights_best_unfreezelast4.hdf5'
checkpoint= ModelCheckpoint(
    filepath=weight_filepath,
    save_weights_only=True,
    monitor="val_loss",
    mode="max",
    save_best_only=True,
)

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=5, min_lr=0.001)

plot_loss = PlotLossesCallback()

train_datagen = ImageDataGenerator(horizontal_flip=True)
valid_datagen = ImageDataGenerator(horizontal_flip=True)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    directory=AVA_DATASET_DIR,
    x_col="image_id", 
    y_col="ratings",
    batch_size=64,
    class_mode="categorical", 
    target_size=(256, 256), 
    preprocessing_function=preprocess_input
    )

valid_generator = train_datagen.flow_from_dataframe(
    valid_df, directory=AVA_DATASET_DIR,
    x_col="image_id", y_col="ratings",batch_size=64,
    class_mode="categorical", target_size=(256, 256), 
    preprocessing_function=preprocess_input
    )

# valid_generator = valid_datagen.flow_from_dataframe(
#     valid_df,
#     directory=AVA_DATASET_DIR,
#     x_col="image_id",
#     y_col="ratings",
#     batch_size=64,
#     class_mode="binary",
#     target_size=(256, 256),
#     preprocessing_function=preprocess_input
#     )


TypeError: All values in column x_col=image_id must be strings.

In [None]:
model.fit()